### Linear Regression

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
train_data = pd.read_csv('linear-regression/train.csv')
test_data = pd.read_csv('linear-regression/test.csv')

In [None]:
print(train_data.head())
print('\nShape of training data :',train_data.shape)
print('\nShape of testing data :',test_data.shape)

In [5]:
train_x = train_data.drop(columns=['Item_Outlet_Sales'],axis=1)
train_y = train_data['Item_Outlet_Sales']

In [6]:
test_x = test_data.drop(columns=['Item_Outlet_Sales'],axis=1)
test_y = test_data['Item_Outlet_Sales']

In [7]:
model = LinearRegression()

In [8]:
model.fit(train_x, train_y)

LinearRegression()

In [None]:
print('\nCoefficient of model :', model.coef_)

# intercept of the model
print('\nIntercept of model',model.intercept_)

In [10]:
predict_train = model.predict(train_x)

In [11]:
print(predict_train)

[ 803.88817641 1733.98835979 3294.52154482 ...  811.16967914 2343.96927185
 2444.98869913]


In [12]:
rmse_train = mean_squared_error(train_y, predict_train) ** (0.5)

In [13]:
print('RMSE: ', rmse_train)

RMSE:  1135.8159344155245


In [None]:
predict_test = model.predict(test_x)
print(predict_test) 

rmse_test = mean_squared_error(test_y, predict_test) ** (0.5)
print('RMSE: ', rmse_test)

### Logistic Regression

In [15]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [16]:
train_data = pd.read_csv('logistic-regression/train.csv')
test_data = pd.read_csv('logistic-regression/test.csv')

In [None]:
print('Shape of training data :',train_data.shape)
print('Shape of testing data :',test_data.shape)

In [17]:
train_x = train_data.drop(columns=['Survived'],axis=1)
train_y = train_data['Survived']

In [18]:
test_x = test_data.drop(columns=['Survived'],axis=1)
test_y = test_data['Survived']

In [19]:
model = LogisticRegression()

In [20]:
model.fit(train_x, train_y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [22]:
print('Coefficient of model :', model.coef_)
print('Intercept of model',model.intercept_)

Coefficient of model : [[-0.03113323  0.00155964  0.93304392  0.08455535 -1.02568371  1.24523329
  -1.25331773  1.05043736  0.97901803  0.61562022 -1.14085062 -0.78092159
  -0.28356303 -0.44782482  0.16176307  0.63396254 -0.04708675  0.20461577
  -0.4576688  -0.33678327 -0.16688699  0.07954253  0.28569839 -0.37332536]]
Intercept of model [0.07223992]


In [None]:
predict_train = model.predict(train_x)
print('Target on train data',predict_train) 

In [24]:
accuracy_train = accuracy_score(train_y,predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

accuracy_score on train dataset :  0.8047752808988764


In [25]:
predict_test = model.predict(test_x)
print('Target on test data',predict_test) 

Target on test data [0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 0 1 0 1 0 1
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 1 0 0 0 0 1 1 0 1 1 0 1 0 0 0 0 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1
 0 1 0 0 0 0 1 1 1 1 0 1 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 1]


In [27]:
accuracy_test = accuracy_score(test_y,predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

accuracy_score on test dataset :  0.8324022346368715


### Decision Tree

In [28]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [29]:
train_data = pd.read_csv('decision-tree/train.csv')
test_data = pd.read_csv('decision-tree/test.csv')

In [None]:
print('Shape of training data :',train_data.shape)
print('Shape of testing data :',test_data.shape)

In [31]:
train_x = train_data.drop(columns=['Survived'],axis=1)
train_y = train_data['Survived']

test_x = test_data.drop(columns=['Survived'],axis=1)
test_y = test_data['Survived']

In [32]:
model = DecisionTreeClassifier()

In [33]:
model.fit(train_x, train_y)

DecisionTreeClassifier()

In [34]:
print('Depth of the Decision Tree :', model.get_depth())

Depth of the Decision Tree : 19


In [None]:
predict_train = model.predict(train_x)
print('Target on train data', predict_train) 

In [36]:
accuracy_train = accuracy_score(train_y, predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

accuracy_score on train dataset :  0.9859550561797753


In [37]:
predict_test = model.predict(test_x)

In [38]:
accuracy_test = accuracy_score(test_y,predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

accuracy_score on test dataset :  0.7877094972067039


### Support Vector Machine

In [39]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [41]:
train_data = pd.read_csv('svm/train.csv')
test_data = pd.read_csv('svm/test.csv')

In [None]:
print('Shape of training data :',train_data.shape)
print('Shape of testing data :',test_data.shape)

In [42]:
train_x = train_data.drop(columns=['Survived'],axis=1)
train_y = train_data['Survived']

test_x = test_data.drop(columns=['Survived'],axis=1)
test_y = test_data['Survived']

In [43]:
model = SVC()

In [44]:
model.fit(train_x, train_y)

SVC()

In [47]:
predict_train = model.predict(train_x)
accuracy_train = accuracy_score(train_y,predict_train)
print(accuracy_train)

0.651685393258427


In [48]:
predict_test = model.predict(test_x)
accuracy_test = accuracy_score(test_y,predict_test)
print(accuracy_test)

0.7262569832402235


### Naive Bayes

In [49]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [50]:
train_data = pd.read_csv('naive-bayes/train.csv')
test_data = pd.read_csv('naive-bayes/test.csv')

In [51]:
print('Shape of training data :',train_data.shape)
print('Shape of testing data :',test_data.shape)

Shape of training data : (712, 25)
Shape of testing data : (179, 25)


In [52]:
train_x = train_data.drop(columns=['Survived'],axis=1)
train_y = train_data['Survived']

test_x = test_data.drop(columns=['Survived'],axis=1)
test_y = test_data['Survived']

In [53]:
model = GaussianNB()

In [54]:
model.fit(train_x, train_y)

GaussianNB()

In [55]:
predict_train = model.predict(train_x)
accuracy_train = accuracy_score(train_y,predict_train)
print(accuracy_train)

0.44803370786516855


In [56]:
predict_test = model.predict(test_x)
accuracy_test = accuracy_score(test_y,predict_test)
print(accuracy_test)

0.35195530726256985
