In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.multiclass import OneVsRestClassifier

In [None]:
data = pd.read_csv("train.csv")

In [None]:
data.head(50)

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.duplicated().any()

In [None]:
data['price_range'].value_counts()

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('Price Range vs all numerical factor')
sns.countplot(ax=axes[0, 0], data=data, x='three_g',palette='GnBu_r')
sns.countplot(ax=axes[0, 1], data=data, x='touch_screen',palette='GnBu_r')
sns.countplot(ax=axes[0, 2], data=data, x='four_g',palette='GnBu_r')
sns.countplot(ax=axes[1, 0], data=data, x='wifi',palette='GnBu_r')
sns.countplot(ax=axes[1,1],data = data, x ='fc' ,palette='GnBu_r')
sns.countplot(ax=axes[1,2],data = data, x ='dual_sim',palette='GnBu_r' )
plt.show()

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(18, 10))

fig.suptitle('Price Range vs all numerical factor')

sns.boxplot(ax=axes[0, 0], data=data, x='price_range', y='battery_power', palette = "GnBu_r")
sns.boxplot(ax=axes[0, 1], data=data, x='price_range', y='ram', palette = "GnBu_r")
sns.boxplot(ax=axes[1, 0], data=data, x='price_range', y='px_width', palette = "GnBu_r")
sns.boxplot(ax=axes[1, 1], data=data, x='price_range', y='px_height', palette = "GnBu_r")
sns.boxplot(ax=axes[2, 0], data=data, x='price_range', y='mobile_wt', palette = "GnBu_r")
sns.boxplot(ax=axes[2, 1], data=data, x='price_range', y='int_memory', palette = "GnBu_r")
plt.show()

In [None]:
x = data.drop('price_range',axis=1) # x variable stores the independent variables.
y = data['price_range'] #y store the dependent variable.

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
ordered_rank_features = SelectKBest(score_func=chi2,k=7)
ordered_feature = ordered_rank_features.fit(x,y)
dfscores = pd.DataFrame(ordered_feature.scores_,columns=["Score"])
dfcolumns = pd.DataFrame(x.columns)
features_rank=pd.concat([dfcolumns,dfscores],axis=1)
features_rank.columns=['Features','Score']
features_rank

In [None]:
features_rank.nlargest(10,'Score')

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
model=ExtraTreesClassifier()
model.fit(x,y)

In [None]:
print(model.feature_importances_)

In [None]:
ranked_features=pd.Series(model.feature_importances_,index=x.columns)
ranked_features.nlargest(10).plot(kind='barh')
plt.show()

In [None]:
from sklearn.feature_selection import mutual_info_classif
mutual_info=mutual_info_classif(x,y)
mutual_data=pd.Series(mutual_info,index=x.columns)
mutual_data.sort_values(ascending=False)

In [None]:
data = data.drop(['wifi','touch_screen','three_g','talk_time','sc_w','dual_sim','four_g','int_memory','blue','n_cores','mobile_wt','m_dep','fc'],axis=1)

In [None]:
data.head()

In [None]:
x = data.drop('price_range',axis=1)
y = data['price_range']

In [None]:
print(x.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(random_state = 42)
logreg.fit(X_train, y_train)

In [None]:
y_pred = logreg.predict(X_test)
lr_probs = logreg.predict_proba(X_test)

In [None]:
print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score
acc1 = accuracy_score(y_test, y_pred)

In [None]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel = 'linear', random_state = 42, probability=True)
svc.fit(X_train, y_train)

In [None]:
y_pred = svc.predict(X_test)
svc_probs = svc.predict_proba(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
acc2 = accuracy_score(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
KN = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
KN.fit(X_train, y_train)

In [None]:
from sklearn.metrics import  accuracy_score
acc3 = accuracy_score(y_test, y_pred)
y_pred = KN.predict(X_test)
KN_probs = KN.predict_proba(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn import metrics
RF = RandomForestClassifier(max_depth=2, random_state=42)
RF.fit(X_train, y_train)

In [None]:
acc6 = accuracy_score(y_test, y_pred)
RF_probs = RF.predict_proba(X_test)

In [None]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.naive_bayes import GaussianNB
NB = GaussianNB()
NB.fit(X_train, y_train)

In [None]:
y_pred = NB.predict(X_test)
NB_probs = NB.predict_proba(X_test)

In [None]:
from sklearn.metrics import  accuracy_score
acc4 = accuracy_score(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier(criterion = 'entropy', random_state = 42)
DT.fit(X_train, y_train)

In [None]:
y_pred = DT.predict(X_test)
DT_probs = DT.predict_proba(X_test)

In [None]:
from sklearn.metrics import  accuracy_score
acc5 = accuracy_score(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
mylist=[]
mylist2=[]
mylist.append(acc1)
mylist2.append("Logistic Regression")
mylist.append(acc2)
mylist2.append("SVM")
mylist.append(acc3)
mylist2.append("KNN")
mylist.append(acc4)
mylist2.append("Naive Bayes")
mylist.append(acc5)
mylist2.append("DTR")
mylist.append(acc6)
mylist2.append("Random Forest")

In [None]:
plt.rcParams['figure.figsize']=8,6
sns.set_style("darkgrid")
ax = sns.barplot(x=mylist2, y=mylist, palette = "rocket", saturation =1.5)
plt.xlabel("Classification Models", fontsize = 20 )
plt.ylabel("Accuracy", fontsize = 20)
plt.title("Accuracy of different Classification Models", fontsize = 20)
plt.xticks(fontsize = 11, horizontalalignment = 'center', rotation = 8)
plt.yticks(fontsize = 13)
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy() 
    ax.annotate(f'{height:.2%}', (x + width/2, y + height*1.02), ha='center', fontsize = 'x-large')
plt.show()

In [None]:
DT_probs = DT_probs[:, 1]
NB_probs = NB_probs[:, 1]
RF_probs = RF_probs[:, 1]
KN_probs = KN_probs[:, 1]
svc_probs = svc_probs[:, 1]
lr_probs = lr_probs[:, 1]

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score

In [None]:
DT_auc = roc_auc_score(y_test, DT_probs, multi_class='ovr')
NB_auc = roc_auc_score(y_test, NB_probs, multi_class='ovr')
RF_auc = roc_auc_score(y_test, RF_probs, multi_class='ovr')
KN_auc = roc_auc_score(y_test, KN_probs, multi_class='ovr')
svc_auc = roc_auc_score(y_test, svc_probs, multi_class='ovr')
lr_auc = roc_auc_score(y_test, lr_probs, multi_class='ovr')

In [None]:
pip install -U yellowbrick

In [None]:
from yellowbrick.classifier import ROCAUC
from sklearn.linear_model import RidgeClassifier
model = LogisticRegression(multi_class="auto", solver="liblinear")
visualizer = ROCAUC(model, classes=['0','1','2','3'])
visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()

In [None]:
model2 = SVC(kernel = 'linear', random_state = 42, probability=True)
visualizer = ROCAUC(model, classes=['0','1','2','3'])
visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()