In [1]:
#gemi koltuk reservazyon ücreti modeli

In [2]:
#Senaryo: Bir gemi sirketinde calisiyorsunuz, sizden koltuk reservazyonu icin fiyat belirlenmesi icin bir model 
#yapmaniz istendi, burada fiyat öncelikle kullanici profiline ve sonrasinda koltuk konumunua göre belirlenecek 
# bunu yaparken titanic veri setini baz alip, yolcu profilinden, ödeme istedigini baz alan bir sistem yapabilirsiniz, 
#farkli görüsleri olan varsa onu da uygulayabilir

![stemming1.png](https://store.donanimhaber.com/50/ee/7f/50ee7fc514d6253b1d5482afde7e9e8a.png)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# titanic veri seti 

df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.describe(include='object')

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
numerical = df.select_dtypes('number')
numerical

In [None]:
print("Number of Uniques:\n", df.select_dtypes('number').nunique(), '\n', sep='')

In [None]:
pd.DataFrame(df.select_dtypes('number').agg(['min', 'mean', 'median', 'max']))

In [None]:
categorical = df.select_dtypes('object')
categorical

In [None]:
print("Number of Uniques:\n", df.select_dtypes('object').nunique(),'\n', sep='')

## Numerical Columns

In [None]:
numerical.columns

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(numerical.corr(), annot=True);

**Fare**

In [None]:
print('Range:', np.min(df['Fare']), '-', np.max(df['Fare']))
print('---'*8)
print('Nunique:', df['Fare'].nunique())
print('---'*8)

df.Fare.describe()

In [None]:
sns.boxplot(data = df, x = 'Fare');

In [None]:
sns.kdeplot(data = df, x = 'Fare', fill=True);

In [None]:
sns.histplot(data=df, x = 'Fare', bins=25);

**PassengerId**

In [None]:
print('Range:', np.min(df['PassengerId']), '-', np.max(df['PassengerId']))
print('---'*8)
print('Nunique:', df['PassengerId'].nunique())
print('---'*8)

df.PassengerId.describe()

In [None]:
# 891 tane unique PassengerId var. Her yolcuya özel. Analizlerde işe yaramaz. Düşülebilir

**Survived**

In [None]:
print('Range:', np.min(df['Survived']), '-', np.max(df['Survived']))
print('---'*8)
print('Nunique:', df['Survived'].nunique())
print('---'*8)

df.Survived.describe()

In [None]:
df.Survived.value_counts(dropna=False)

In [None]:
sns.countplot(data=df, x='Survived');

In [None]:
df.groupby('Survived')['Fare'].agg(['min', 'mean', 'median', 'max'])

In [None]:
sns.kdeplot(data = df, x = 'Fare', hue = "Survived", fill=True);

In [None]:
# Survived sütunu bilet fiyatının belirlenmesinde önemli değil. 
# Ancak, kimler hayatta kalmış? 
# Hayatta kalma nedenlerinin belirlenmesinde anahtar sütun.
# Yukarıdaki tabloya göre yüksek bilet fiyatı ödeyenler düşük bilet bedeli ödeyenlere göre daha fazla hayatta kalmış.
# 

**Pclass**

pclass ->Ticket class -> 1 = 1st, 2 = 2nd, 3 = 3rd

In [None]:
print('Range:', np.min(df['Pclass']), '-', np.max(df['Pclass']))
print('---'*8)
print('Nunique:', df['Pclass'].nunique())
print('---'*8)

df.Pclass.describe()

In [None]:
df.Pclass.value_counts(dropna=False)

In [None]:
sns.countplot(data=df, x='Pclass');

In [None]:
df.groupby('Pclass')['Fare'].agg(['min', 'mean', 'median', 'max'])

In [None]:
# Pclass bilet fiyatı belirlemesinde önemli bir sütun.
# 1'inci class ın 2'nci class a, 2'nci  class ın ise 3'üncü class a göre bilet fiyatları daha yüksek

**Age**

In [None]:
print('Range:', np.min(df['Age']), '-', np.max(df['Age']))
print('---'*8)
print('Nunique:', df['Age'].nunique())
print('---'*8)

df.Age.describe()

In [None]:
df.Age.value_counts(dropna=False)

In [None]:
df.Age.isnull().sum()

In [None]:
sns.boxplot(data = df, x = 'Age');

In [None]:
sns.kdeplot(data = df, x = 'Age', fill=True);

In [None]:
sns.histplot(data=df, x = 'Age', bins=10);

In [None]:
print(f'Age mean:{df.Age.mean()}')
print(f'Age median:{df.Age.median()}')

In [None]:
df.groupby('Pclass').Age.median()

In [None]:
df.groupby(['Pclass', 'Sex']).Age.median()

In [None]:
df.groupby(['Pclass', 'Sex']).Age.transform("median")

In [None]:
df['Age'] = df['Age'].fillna(df.groupby(['Pclass', 'Sex']).Age.transform("median"))

In [None]:
df.isnull().sum()

In [None]:
sns.scatterplot(data=df, x='Age', y='Fare');

In [None]:
sns.scatterplot(data=df, x='Age', y='Fare', hue='Pclass');

**SibSp**

- sibsp: The dataset defines family relations in this way...
- Sibling = brother, sister, stepbrother, stepsister
- Spouse = husband, wife (mistresses and fiancés were ignored)

In [None]:
print('Range:', np.min(df['SibSp']), '-', np.max(df['SibSp']))
print('---'*8)
print('Nunique:', df['SibSp'].nunique())
print('---'*8)

df.SibSp.describe()

In [None]:
df.SibSp.value_counts(dropna=False)

In [None]:
sns.countplot(data=df, x='SibSp');

In [None]:
df.groupby('SibSp')['Fare'].agg(['min', 'mean', 'median', 'max'])

**Parch**

- parch: The dataset defines family relations in this way...
- Parent = mother, father
- Child = daughter, son, stepdaughter, stepson
- Some children travelled only with a nanny, therefore parch=0 for them.

In [None]:
print('Range:', np.min(df['Parch']), '-', np.max(df['Parch']))
print('---'*8)
print('Nunique:', df['Parch'].nunique())
print('---'*8)

df.Parch.describe()

In [None]:
df.Parch.value_counts(dropna=False)

In [None]:
sns.countplot(data=df, x='Parch');

In [None]:
df.groupby('Parch')['Fare'].agg(['min', 'mean', 'median', 'max'])

## Categorical Columns

In [None]:
categorical.columns

**Sex**

In [None]:
print('Nunique:', df['Sex'].nunique())
print('---'*8)

df.Sex.value_counts(dropna=False)

In [None]:
print(df.groupby('Sex')['Fare'].mean())
df.groupby('Sex')['Fare'].mean().plot(kind='bar');

In [None]:
df.groupby('Sex')['Fare'].agg(['min', 'mean', 'median', 'max'])

**Ticket**

In [None]:
print('Nunique:', df['Ticket'].nunique())
print('---'*8)

df.Ticket.value_counts(dropna=False)

**Cabin**

In [None]:
print('Nunique:', df['Cabin'].nunique())
print('---'*8)

df.Cabin.value_counts(dropna=False)

In [None]:
df['Cabin_V2'] = df['Cabin'].str[0]

In [None]:
df['Cabin_V2'].value_counts(dropna=False)

In [None]:
df.groupby('Cabin_V2')['Fare'].agg(['min', 'mean', 'median', 'max', 'nunique'])

In [None]:
df.drop(['Cabin','Cabin_V2'], axis=1, inplace=True)
df.shape

**Embarked**

- Port of Embarkation	C = Cherbourg, Q = Queenstown, S = Southampton

In [None]:
print('Nunique:', df['Embarked'].nunique())
print('---'*8)

df.Embarked.value_counts(dropna=False)

In [None]:
df.dropna(inplace = True)
df.shape

In [None]:
sns.countplot(data=df, x='Embarked');

In [None]:
df.groupby('Embarked')['Fare'].agg(['min', 'mean', 'median', 'max'])

In [None]:
df.to_csv("cleaned_titanic.csv", index=False)
pd.read_csv("cleaned_titanic.csv")

In [72]:
def calculate_fare(start_station, end_station, is_round_trip=False, is_student=False, fare_per_station=75):
    route = ['ankara', 'eskişehir', 'bilecik', 'sakarya', 'kocaeli', 'istanbul']

    try:
        start_index = route.index(start_station.lower())
        end_index = route.index(end_station.lower())

        # Calculate the number of stations traveled
        num_stations = abs(end_index - start_index)

        # Minimum fare is 75 for each shortest route
        base_fare = num_stations * fare_per_station

        # Apply a 10% discount for round-trip tickets
        round_trip_discount = 0.9 if is_round_trip else 1

        # Apply a 25% discount for students
        student_discount = 0.75 if is_student else 1

        # Calculate the final fare
        fare = base_fare * round_trip_discount * student_discount

        return fare

    except ValueError:
        # Handle cases where the entered station is not in the route
        return "Invalid station. Please enter a valid station from the route."

# Example usage:
start_station = input("Enter the starting station: ")
end_station = input("Enter the destination station: ")
round_trip_option = input("Is this a round trip? (yes/no): ").lower()
student_option = input("Is the passenger a student? (yes/no): ").lower()

is_round_trip = round_trip_option == 'yes'
is_student = student_option == 'yes'

fare_result = calculate_fare(start_station, end_station, is_round_trip, is_student)

if isinstance(fare_result, str):
    print(fare_result)
else:
    trip_type = "round trip" if is_round_trip else "one way"
    student_status = "student" if is_student else "non-student"
    print(f"The fare for the {trip_type} journey from {start_station} to {end_station} for a {student_status} is {fare_result}.")



Enter the starting station: ankara
Enter the destination station: istanbul
Is this a round trip? (yes/no): no
Is the passenger a student? (yes/no): no
The fare for the one way journey from ankara to istanbul for a non-student is 375.


In [None]:
# gemi koltuk reservazyon ücreti modeli

# burada fiyat öncelikle kullanici profiline
# sonrasinda koltuk konumunua göre belirlenecek 
# bunu yaparken titanic veri setini baz alip, yolcu profilinden, ödeme istedigini baz alan bir sistem yapabilirsiniz, 
#farkli görüsleri olan varsa onu da uygulayabilir

In [76]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error


# Model için kullanılacak özelliklerin seçilmesi
features = ['Pclass', 'Sex', 'Age']

# Bağımsız değişkenlerin ve bağımlı değişkenin seçilmesi
X = df[features]
y = df['Fare']

# Kategorik değişkenleri sayısala dönüştürme (One-Hot Encoding)
X = pd.get_dummies(X, columns=['Sex'], drop_first=True)

# Eğitim ve test veri setlerinin oluşturulması
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelin oluşturulması ve eğitilmesi
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Modelin performansının değerlendirilmesi
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

# Kullanıcı profili ve rezervasyon bilgilerini alarak ücreti tahmin etme fonksiyonu
def predict_fare(pclass, sex, age):
    # Kullanıcı girişini modele uygun formata getirme
    input_data = {'Pclass': [pclass], 'Sex_male': [sex.lower() == 'male'], 'Age': [age]}
    input_df = pd.DataFrame(data=input_data)

    # Modeli kullanarak ücret tahmini
    fare_prediction = model.predict(input_df)
    return fare_prediction[0]

# Örnek kullanım
user_pclass = int(input("Yolcu sınıfını girin (1, 2, 3): "))
user_sex = input("Yolcu cinsiyetini girin (male/female): ")
user_age = float(input("Yolcu yaşını girin: "))

predicted_fare = predict_fare(user_pclass, user_sex, user_age)
print(f"Tahmini rezervasyon ücreti: {predicted_fare}")

Mean Absolute Error: 19.151399527452515
Yolcu sınıfını girin (1, 2, 3): 2
Yolcu cinsiyetini girin (male/female): male
Yolcu yaşını girin: 55


ValueError: The feature names should match those that were passed during fit.
Feature names must be in the same order as they were in fit.
