# Gerekli Kütüphaneler

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Veri Okuma

In [None]:
meta_data = pd.read_csv("/kaggle/input/titanic/train.csv")
data = meta_data.copy()
data.head()

In [None]:
meta_test = pd.read_csv("/kaggle/input/titanic/test.csv")
test = meta_test.copy()
test.head()

In [None]:
data.info()
print("\n")
test.info()

In [None]:
print("Veri tipleri \n", data.info())
print("Verinin boyutu :",data.shape)
print("Verinin skalaları \n",data.describe())
print("Verideki boş satır sayıları \n", data.isnull().sum())

# Veri Temizleme ve Dönüştürme İşlemleri


## Çok fazla boş veri içeren verileri silme

In [None]:
def drops(data):
  data.drop("Ticket", axis=1, inplace=True)
  data.drop("Cabin", axis=1, inplace=True)

In [None]:
drops(data)
print(data.isnull().sum())
print("----")
drops(test)
print(test.isnull().sum())

In [None]:
sns.catplot(data=data, x="Age", y="Embarked", hue="Sex", kind="boxen")

In [None]:
sns.catplot(data=data, x="Age", y="Sex", kind="violin", color=".9", inner=None)
sns.swarmplot(data=data, x="Age", y="Sex", size=3)

In [None]:
g = sns.catplot(
    data=data, x="Sex", y="Survived", col="Embarked",
    kind="bar", height=4, aspect=.6,
)
g.set_axis_labels("", "Survival Rate")
g.set_xticklabels(["Male", "Female"])
g.set_titles("{col_name} {col_var}")
g.set(ylim=(0, 1))
g.despine(left=True)

## Kategorik verilerin analize hazır hale getirilmesi

In [None]:
data["Embarked"].value_counts()

In [None]:
print("Embarked \n",data["Embarked"].unique())
print("Sex \n",data["Sex"].unique())

In [None]:
data["Embarked"].fillna("S", inplace=True)
test["Fare"].fillna(test["Fare"].mean(),inplace=True)
print("Embarked \n",data["Embarked"].unique())

In [None]:
test.isnull().sum()

In [None]:
data.isnull().sum()

In [None]:
def age_fill_(data):
  age_fill = data.groupby(['Pclass', 'Sex'])['Age'].mean()
  data['Age'] = data.apply(lambda x: age_fill[x['Pclass'], x['Sex']] if pd.isnull(x['Age']) else x['Age'], axis=1)
age_fill_(data)
age_fill_(test)

In [None]:
test.isnull().sum()

In [None]:
def convert_datas(data):
  for feature in ["Embarked", "Sex"]:
    new_cols = pd.get_dummies(data[feature])
    data = data.join(new_cols)
    data.drop(feature, axis=1, inplace=True)
  return data
data = convert_datas(data)
data.head(10)

In [None]:
test = convert_datas(test)
test.head(10)

In [None]:
print("Veri tipleri \n", data.info())
print("Verinin boyutu :",data.shape)
print("Verinin skalaları \n",data.describe())
print("Verideki boş satır sayıları \n", data.isnull().sum())

In [None]:
import re
def get_title(name):
    title_search = re.search(' ([A-Za-z]+)\.', name)
    if title_search:
        return title_search.group(1)
    return ""

In [None]:
data['Title'] = data['Name'].apply(get_title)
data['Title'] = data['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don',                                                   'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
data['Title'] = data['Title'].replace('Mlle', 'Miss')
data['Title'] = data['Title'].replace('Ms', 'Miss')
data['Title'] = data['Title'].replace('Mme', 'Mrs')
data.drop('Name',axis=1,inplace=True)

In [None]:
title_col = pd.get_dummies(data["Title"])
data = data.join(title_col)
data.drop("Title", axis=1, inplace=True)

In [None]:
test['Title'] = test['Name'].apply(get_title)
test['Title'] = test['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don',
                                       'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
test['Title'] = test['Title'].replace('Mlle', 'Miss')
test['Title'] = test['Title'].replace('Ms', 'Miss')
test['Title'] = test['Title'].replace('Mme', 'Mrs')
test.drop('Name',axis=1,inplace=True)

In [None]:
title_col = pd.get_dummies(test["Title"])
test = test.join(title_col)
test.drop("Title", axis=1, inplace=True)

In [None]:
test

In [None]:
data

## Ayrık verilerin temizlenmesi

In [None]:
sns.boxplot(data=data["Age"])
plt.show()

In [None]:
sns.boxplot(data=data["Fare"])
plt.show()

In [None]:
def remove_outliers(data):#test
    indexs = []
    for feature in ["Age", "Fare"]:
        q1 = data[feature].quantile(0.25)
        q3 = data[feature].quantile(0.75)
        IQR = q3-q1
        under_line = q1-1.5*IQR
        high_line = q3+1.5*IQR
        discrete_min = data[data[feature]<under_line][feature]
        discrete_max = data[data[feature]>high_line][feature]
        discrete = pd.concat([discrete_min, discrete_max], axis=0).index
        for i in discrete:
            indexs.append(i)
    data = data.drop(data.index[indexs])
    #test = test.drop(test.index[indexs])
    data.index = range(len(data))
    #test.index = range(len(test))
    return data

In [None]:
data = remove_outliers(data)

In [None]:
sns.boxplot(data=data["Age"])
plt.show()

In [None]:
sns.boxplot(data=data["Fare"])
plt.show()

In [None]:
print(data)
print(test)

In [None]:
corr = data.corr()
print(corr['Survived'].sort_values(ascending=False))

In [None]:
f, ax = plt.subplots(figsize=(9,9))
sns.heatmap(data.corr(), annot=True, linewidth=5, ax=ax)

In [None]:
data.hist(figsize=(18,12))
plt.show()

# Ayırma, model oluşturma işlemleri

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
X = data.drop("Survived", axis=1)
y = data["Survived"]
print(X)
print(y)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size=0.2, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
model = LogisticRegression()
model.fit(X_train, Y_train)

In [None]:
X_train_prediction = model.predict(X_train)
print(X_train_prediction)

In [None]:
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
print('Accuracy score of training data : ', training_data_accuracy)

In [None]:
X_test_prediction = model.predict(X_test)

In [None]:
print(X_test_prediction)

In [None]:
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print('Accuracy score of test data : ', test_data_accuracy)