# Fine Tuning(Logistic Regression)

In [89]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder 

In [90]:
df = pd.read_csv("covid_toy.csv")
df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [91]:
si = SimpleImputer()
df['fever'] = si.fit_transform(df[['fever']])
df.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64

In [92]:
x = df.drop(columns= ['has_covid'])
y = df['has_covid']
x

Unnamed: 0,age,gender,fever,cough,city
0,60,Male,103.0,Mild,Kolkata
1,27,Male,100.0,Mild,Delhi
2,42,Male,101.0,Mild,Delhi
3,31,Female,98.0,Mild,Kolkata
4,65,Female,101.0,Mild,Mumbai
...,...,...,...,...,...
95,12,Female,104.0,Mild,Bangalore
96,51,Female,101.0,Strong,Kolkata
97,20,Female,101.0,Mild,Bangalore
98,5,Female,98.0,Strong,Mumbai


In [93]:
lb = LabelEncoder()
x = pd.get_dummies(x,columns=['gender','cough','city'])
y = lb.fit_transform(y)
x = x.astype(int)



In [94]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=42)

In [95]:
from sklearn.linear_model import LogisticRegression

In [96]:
lr = LogisticRegression(max_iter = 1000)
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)

In [97]:
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy = accuracy_score(y_test,y_pred)
print(f"Accuracy score: {accuracy*100:.2f}%")

confusion = confusion_matrix(y_test,y_pred)
print(f"Confusion Matrix:")
print(confusion)

Accuracy score: 45.00%
Confusion Matrix:
[[4 9]
 [2 5]]


In [98]:
param_grid = {
    'penalty': ['l1', 'l2', 'elasticnet', None],   # all valid options
    'C': [0.01, 0.1, 1, 10],                       # regularization strength
    'solver': ['liblinear', 'saga'],               # solvers that support l1/l2/elasticnet
    'l1_ratio': [0.0, 0.5, 1.0]                    # only used if penalty='elasticnet'
}

# Grid Search CV

In [100]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
clf = GridSearchCV(lr,param_grid = param_grid, cv = 4, verbose = True, n_jobs = -1)
clf

In [101]:
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)
print(f"Best Parameters: {clf.best_params_}")
print(f"Best CV Score: {clf.best_score_*100:.2f}%")
accuracy = accuracy_score(y_test,y_pred)
print(f"Accuracy score: {accuracy*100:.2f}%")

Fitting 4 folds for each of 96 candidates, totalling 384 fits
Best Parameters: {'C': 0.01, 'l1_ratio': 0.0, 'penalty': 'l1', 'solver': 'liblinear'}
Best CV Score: 52.50%
Accuracy score: 65.00%


# Randomized Search CV

In [103]:
from sklearn.model_selection import RandomizedSearchCV
random_serach = RandomizedSearchCV(lr,param_distributions= param_grid, cv = 4, verbose=True , n_jobs= -1)
random_serach.fit(x_train,y_train)
print(f"Best Parameters: {clf.best_params_}")
print(f"Best CV Score: {clf.best_score_*100:.2f}%")
accuracy_r = accuracy_score(y_test,y_pred)
print(f"Accuracy(Random_Search): {accuracy_r*100:.2f}%")

Fitting 4 folds for each of 10 candidates, totalling 40 fits
Best Parameters: {'C': 0.01, 'l1_ratio': 0.0, 'penalty': 'l1', 'solver': 'liblinear'}
Best CV Score: 52.50%
Accuracy(Random_Search): 65.00%


# Userbehavior

In [105]:
df = pd.read_csv("userbehaviour.csv")
df

Unnamed: 0,userid,Average Screen Time,Average Spent on App (INR),Left Review,Ratings,New Password Request,Last Visited Minutes,Status
0,1001,17.0,634.0,1,9,7,2990,Installed
1,1002,0.0,54.0,0,4,8,24008,Uninstalled
2,1003,37.0,207.0,0,8,5,971,Installed
3,1004,32.0,445.0,1,6,2,799,Installed
4,1005,45.0,427.0,1,5,6,3668,Installed
...,...,...,...,...,...,...,...,...
994,1995,38.0,938.0,0,5,4,1865,Installed
995,1996,43.0,61.0,0,6,8,1327,Installed
996,1997,47.0,761.0,0,6,1,1936,Installed
997,1998,6.0,73.0,1,5,1,3436,Installed


In [106]:
x = df.drop(columns=['Status'])
y = df['Status']
y = lb.fit_transform(y)

In [107]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)
lr.fit(x_train,y_train)
y_pred = lr.predict(x_test)
accuracy = accuracy_score(y_test,y_pred)
print(f"Accuracy: {accuracy*100:.2f}")

Accuracy: 100.00
