## Classification Project

### Importing models and dataset

In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline

In [2]:
df1 = pd.read_csv("state-wise-women2.csv") 
df1.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 12 columns):
 #   Column                                      Non-Null Count  Dtype 
---  ------                                      --------------  ----- 
 0   State                                       37 non-null     object
 1   Cases pending trial from the previous year  37 non-null     int64 
 2   Cases sent for trial during the year        37 non-null     int64 
 3   Total cases for
trial during the year       37 non-null     int64 
 4   Cases withdrawn by the Govt                 37 non-null     int64 
 5   Cases disposed off by Plea Bargaining       37 non-null     int64 
 6   Cases compounded or withdrawn               37 non-null     int64 
 7   Cases in which trials were completed        37 non-null     int64 
 8   Cases convicted                             37 non-null     int64 
 9   Cases Acquitted or Discharged               37 non-null     int64 
 10  Total cases disposed off by 

In [3]:
x = df1.iloc[:-1, 1:-1].values
y = df1.iloc[:-1, -1].values 

In [4]:
x[:5] 

array([[2155,  937, 3092,    0,    0,   15,  631,   79,  552,  646],
       [ 706,   63,  769,    0,    0,    2,   24,   14,   10,   26],
       [7188, 1276, 8464,    0,    0,    0,  636,  103,  533,  636],
       [4651, 1157, 5808,    0,    0,    0,  390,  146,  244,  390],
       [2910, 1608, 4518,    0,    0,    0, 1518,  453, 1065, 1518]],
      dtype=int64)

### No need for Encoding and also it has no missing Data

### Spliting the Data into training set and test set

In [5]:
from sklearn.model_selection import train_test_split 
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=0)

In [6]:
xtrain[:5]

array([[3053, 1311, 4364,    0,    0,    0,  695,   91,  604,  695],
       [ 127,   23,  150,    0,    0,    0,    7,    0,    7,    7],
       [ 183,  263,  446,    0,    0,    0,  114,   61,   53,  114],
       [  20,    4,   24,    0,    0,    0,    2,    1,    1,    2],
       [5011, 2109, 7120,    0,    0,    0,  586,  156,  430,  586]],
      dtype=int64)

### Feaure Scaling

In [7]:
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler() 
xtrain = sc.fit_transform(xtrain) 
xtest = sc.transform(xtest) 

In [8]:
xtrain[:5] 

array([[-0.15879382,  0.17767275, -0.09331669,  0.        , -0.19245009,
        -0.48709524,  0.13062339, -0.25467279,  0.28114066,  0.11830934],
       [-0.75997961, -0.84819821, -0.80857089,  0.        , -0.19245009,
        -0.48709524, -0.75447295, -0.65107247, -0.76786542, -0.75726161],
       [-0.74847366, -0.65704213, -0.75832997,  0.        , -0.19245009,
        -0.48709524, -0.61681989, -0.385354  , -0.68703748, -0.62108997],
       [-0.78196419, -0.8633314 , -0.82995723,  0.        , -0.19245009,
        -0.48709524, -0.76090534, -0.64671643, -0.77840819, -0.76362477],
       [ 0.24350343,  0.81326671,  0.37446702,  0.        , -0.19245009,
        -0.48709524, -0.00960263,  0.02846984, -0.02459981, -0.02040757]])

### Training various ML Models over same training model.

In [9]:
from sklearn.linear_model import LogisticRegression 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier 
from sklearn.svm import SVC 
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier

lrc = LogisticRegression(random_state=0)
lrc.fit(xtrain, ytrain)
knnc = KNeighborsClassifier() 
knnc.fit(xtrain, ytrain) 
dtc = DecisionTreeClassifier() 
dtc.fit(xtrain, ytrain) 
svc = SVC(kernel="rbf", random_state=0) 
svc.fit(xtrain, ytrain) 
nbc = GaussianNB() 
nbc.fit(xtrain, ytrain) 
rfc = RandomForestClassifier(random_state=0) 
rfc.fit(xtrain, ytrain) 

RandomForestClassifier(random_state=0)

### Predicting Test Results and Observing Confusion Matrix

In [10]:
from sklearn.metrics import confusion_matrix

# Logestic Regressor 
lrpred = lrc.predict(xtest) 
confusion_matrix(ytest, lrpred) 

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int64)

In [11]:
# K-NearestNeighbour 
knnpred = knnc.predict(xtest) 
confusion_matrix(ytest, knnpred) 

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]], dtype=int64)

In [12]:
# Decision Tree Classifier 
dtpred = dtc.predict(xtest) 
confusion_matrix(ytest, dtpred) 

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]], dtype=int64)

In [13]:
# Support Vector Machines 
svmpred = svc.predict(xtest)
confusion_matrix(ytest, svmpred) 

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int64)

In [14]:
# Naive Bayes 
nbpred = nbc.predict(xtest) 
confusion_matrix(ytest, nbpred) 

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int64)

In [15]:
# Random Forest 
rfpred = rfc.predict(xtest) 
confusion_matrix(ytest, rfpred) 

array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]], dtype=int64)

### Evaluating Models

In [16]:
from sklearn.metrics import accuracy_score 

# Logestic Regressor 
accuracy_score(ytest, lrpred)

0.0

In [17]:
# K-NearestNeighbour 
accuracy_score(ytest, knnpred) 

0.0

In [18]:
# Decision Tree Classifier 
accuracy_score(ytest, dtpred) 

0.0

In [19]:
# Support Vector Machines 
accuracy_score(ytest, svmpred) 

0.0

In [20]:
# Naive Bayes 
accuracy_score(ytest, nbpred) 

0.0

In [21]:
# Random Forest 
accuracy_score(ytest, rfpred) 

0.0

In [22]:
from sklearn.metrics import balanced_accuracy_score 

# Logestic Regressor 
balanced_accuracy_score(ytest, lrpred) 

0.0

In [23]:
# K-NearestNeighbour 
balanced_accuracy_score(ytest, knnpred) 

0.0

In [24]:
# Decision Tree Classifier 
balanced_accuracy_score(ytest, dtpred) 

0.0

In [25]:
# Support Vector Machines 
balanced_accuracy_score(ytest, svmpred) 

0.0

In [26]:
# Naive Bayes 
balanced_accuracy_score(ytest, nbpred) 

0.0

In [27]:
# Random Forest 
balanced_accuracy_score(ytest, rfpred) 

0.0

## End of the Project