In [76]:
# importing libraries

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [77]:
# loading the dataset

df = pd.read_csv(r'D:\Excelr\Data_Science _Course\Code\DATASET\Titanic-Dataset.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [78]:
# to drop the unwanted columns and save it as original dataset

df = df.drop(['PassengerId','Name','Ticket'],axis=1)
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,0,3,male,22.0,1,0,7.2500,,S
1,1,1,female,38.0,1,0,71.2833,C85,C
2,1,3,female,26.0,0,0,7.9250,,S
3,1,1,female,35.0,1,0,53.1000,C123,S
4,0,3,male,35.0,0,0,8.0500,,S
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,,S
887,1,1,female,19.0,0,0,30.0000,B42,S
888,0,3,female,,1,2,23.4500,,S
889,1,1,male,26.0,0,0,30.0000,C148,C


In [79]:
# to check the type of data and missing values

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Sex       891 non-null    object 
 3   Age       714 non-null    float64
 4   SibSp     891 non-null    int64  
 5   Parch     891 non-null    int64  
 6   Fare      891 non-null    float64
 7   Cabin     204 non-null    object 
 8   Embarked  889 non-null    object 
dtypes: float64(2), int64(4), object(3)
memory usage: 62.8+ KB


In [80]:
# to get the total null value

df.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [81]:
# to get the percentage value

df.isnull().sum()/len(df)*100

Survived     0.000000
Pclass       0.000000
Sex          0.000000
Age         19.865320
SibSp        0.000000
Parch        0.000000
Fare         0.000000
Cabin       77.104377
Embarked     0.224467
dtype: float64

In [82]:
# dropping the cabin column due to most the value are null values

df = df.drop(['Cabin'],axis=1)

In [83]:
# to fill the null value in Age data
# here we take mean first , but it does not perform well in training and testing data. so we take median of the age to fill the null value

df['Age'] = df['Age'].fillna(round(df['Age'].median()))

In [84]:
# to get the count of element in the Embarked data

df['Embarked'].value_counts()

Embarked
S    644
C    168
Q     77
Name: count, dtype: int64

In [85]:
# to fill the null value in Embarked data with the mode elements

df['Embarked'] = df['Embarked'].fillna('S')

In [86]:
df.isnull().sum()/len(df)*100

Survived    0.0
Pclass      0.0
Sex         0.0
Age         0.0
SibSp       0.0
Parch       0.0
Fare        0.0
Embarked    0.0
dtype: float64

In [87]:
# to convert the textual data in Sex and Embarked column using feature enocoding (Here we use One Hot Encoding)

df = pd.get_dummies(data=df,columns=['Sex','Embarked'],dtype=int,drop_first=True)

In [88]:
df

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.2500,1,0,1
1,1,1,38.0,1,0,71.2833,0,0,0
2,1,3,26.0,0,0,7.9250,0,0,1
3,1,1,35.0,1,0,53.1000,0,0,1
4,0,3,35.0,0,0,8.0500,1,0,1
...,...,...,...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000,1,0,1
887,1,1,19.0,0,0,30.0000,0,0,1
888,0,3,28.0,1,2,23.4500,0,0,1
889,1,1,26.0,0,0,30.0000,1,0,0


In [89]:
# assigning x data and y data

x = df.drop(['Survived'],axis=1)
y = df['Survived']

In [90]:
x

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
0,3,22.0,1,0,7.2500,1,0,1
1,1,38.0,1,0,71.2833,0,0,0
2,3,26.0,0,0,7.9250,0,0,1
3,1,35.0,1,0,53.1000,0,0,1
4,3,35.0,0,0,8.0500,1,0,1
...,...,...,...,...,...,...,...,...
886,2,27.0,0,0,13.0000,1,0,1
887,1,19.0,0,0,30.0000,0,0,1
888,3,28.0,1,2,23.4500,0,0,1
889,1,26.0,0,0,30.0000,1,0,0


In [91]:
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [92]:
# to split the data into training data testing data

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [93]:
x_train

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
689,1,15.0,0,1,211.3375,0,0,1
134,2,25.0,0,0,13.0000,1,0,1
399,2,28.0,0,0,12.6500,0,0,1
463,2,48.0,0,0,13.0000,1,0,1
666,2,25.0,0,0,13.0000,1,0,1
...,...,...,...,...,...,...,...,...
737,1,35.0,0,0,512.3292,1,0,0
110,1,47.0,0,0,52.0000,1,0,1
773,3,28.0,0,0,7.2250,1,0,0
626,2,57.0,0,0,12.3500,1,1,0


In [94]:
x_test

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
250,3,28.0,0,0,7.2500,1,0,1
793,1,28.0,0,0,30.6958,1,0,0
600,2,24.0,2,1,27.0000,0,0,1
435,1,14.0,1,2,120.0000,0,0,1
465,3,38.0,0,0,7.0500,1,0,1
...,...,...,...,...,...,...,...,...
333,3,16.0,2,0,18.0000,1,0,1
450,2,36.0,1,2,27.7500,1,0,1
719,3,33.0,0,0,7.7750,1,0,1
702,3,18.0,0,1,14.4542,0,0,0


In [95]:
y_train

689    1
134    0
399    1
463    0
666    0
      ..
737    1
110    0
773    0
626    0
297    0
Name: Survived, Length: 712, dtype: int64

In [96]:
y_test

250    0
793    0
600    1
435    1
465    0
      ..
333    0
450    0
719    0
702    0
750    1
Name: Survived, Length: 179, dtype: int64

In [97]:
# to get the shape of these data

x_train.shape

(712, 8)

In [98]:
x_test.shape

(179, 8)

In [99]:
len(y_train)

712

In [100]:
len(y_test)

179

In [101]:
# assigning logistic regression to a variable

lr = LogisticRegression()
lr

In [102]:
lr.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [103]:
# to get the m value or coefficent

lr.coef_

array([[-1.09509967e+00, -4.20708950e-02, -3.70681666e-01,
        -3.16897999e-02,  2.56219018e-03, -2.58427158e+00,
         1.06066582e-01, -3.56854293e-01]])

In [104]:
# to get the intercept 

lr.intercept_

array([5.3337926])

In [105]:
# to get the predicted y_train and y_test

y_pred_train = lr.predict(x_train)
y_pred_test = lr.predict(x_test)

In [106]:
y_pred_train

array([1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,

In [107]:
y_pred_test

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1])

In [108]:
# to get the classification report including accuracy , precision , f1 , recall values for the training data

print(classification_report(y_train,y_pred_train))

              precision    recall  f1-score   support

           0       0.82      0.85      0.83       422
           1       0.77      0.73      0.75       290

    accuracy                           0.80       712
   macro avg       0.79      0.79      0.79       712
weighted avg       0.80      0.80      0.80       712



In [109]:
# to get the classification report including accuracy , precision , f1 , recall values for the testing data

print(classification_report(y_test,y_pred_test))

              precision    recall  f1-score   support

           0       0.89      0.76      0.82       127
           1       0.57      0.77      0.66        52

    accuracy                           0.77       179
   macro avg       0.73      0.77      0.74       179
weighted avg       0.80      0.77      0.77       179



In [110]:
# to get the column names

x.columns

Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex_male', 'Embarked_Q',
       'Embarked_S'],
      dtype='object')

In [111]:
df

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.2500,1,0,1
1,1,1,38.0,1,0,71.2833,0,0,0
2,1,3,26.0,0,0,7.9250,0,0,1
3,1,1,35.0,1,0,53.1000,0,0,1
4,0,3,35.0,0,0,8.0500,1,0,1
...,...,...,...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000,1,0,1
887,1,1,19.0,0,0,30.0000,0,0,1
888,0,3,28.0,1,2,23.4500,0,0,1
889,1,1,26.0,0,0,30.0000,1,0,0


In [112]:
# to get input from the user
for i in range(2):
        Pclass = int(input("Enter your class section : "))
        Age = int(input("Enter your Age : "))
        SibSp = int(input("Enter your number of Siblings or spouse : "))
        Parch = int(input("Enter your number of children or parents : "))
        Fare = float(input("Enter your Fare : "))
        Sex = input("Enter your Sex[M/F] : ").lower()
        Embarked_Q = input("Enter you're going to QueensTown[Y/N] : ").lower()
        Embarked_S = input("Enter you're going to Southampton[Y/N] : ").lower()
        
        if Sex == 'm':
            Sex = 1
        else : 
            Sex = 0
        
        if Embarked_Q == 'y':
            Embarked_Q = 1
        else :
            Embarked_Q = 0
        
        if Embarked_S == 'y':
            Embarked_S = 1
        else :
            Embarked_S = 0
        
        user_input = np.array([Pclass,Age,SibSp,Parch,Fare,Sex,Embarked_Q,Embarked_S]).reshape(1,-1)
        prediction = lr.predict(user_input)
        if prediction == 0:
            print("RIP! , You will not Survive")
        else :
            print("Hooray! , You will Survive")

Enter your class section :  2
Enter your Age :  30
Enter your number of Siblings or spouse :  0
Enter your number of children or parents :  1
Enter your Fare :  20
Enter your Sex[M/F] :  m
Enter you're going to QueensTown[Y/N] :  y
Enter you're going to Southampton[Y/N] :  n




RIP! , You will not Survive


Enter your class section :  1
Enter your Age :  15
Enter your number of Siblings or spouse :  2
Enter your number of children or parents :  2
Enter your Fare :  79.3
Enter your Sex[M/F] :  f
Enter you're going to QueensTown[Y/N] :  n
Enter you're going to Southampton[Y/N] :  y


Hooray! , You will Survive


