In [1]:
#https://onezero.blog/modelling-binary-logistic-regression-using-python-research-oriented-modelling-and-interpretation/

In [2]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from statsmodels.formula.api import logit
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv(r'C:\Users\WINDOWS\OneDrive\Desktop\PANDAS PRACTICE\Pokemon.csv')

In [4]:
data = pd.read_csv('LogisticRegression.csv',dtype ={'Legendary':object})

In [5]:
data.drop('Unnamed: 0', axis = 1 , inplace = True)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Speed      800 non-null    int64 
 1   Defense    800 non-null    int64 
 2   Attack     800 non-null    int64 
 3   Legendary  800 non-null    object
dtypes: int64(3), object(1)
memory usage: 25.1+ KB


In [7]:
data['Legendary'] = data['Legendary'].map({"True":0,"False":1})

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Speed      800 non-null    int64
 1   Defense    800 non-null    int64
 2   Attack     800 non-null    int64
 3   Legendary  800 non-null    int64
dtypes: int64(4)
memory usage: 25.1 KB


In [9]:
#Split
train_data , test_data  = train_test_split(data,test_size = .20 , random_state = 42)

In [10]:
formula = ('Legendary ~ Attack + Defense + Speed')

In [11]:
data.head()

Unnamed: 0,Speed,Defense,Attack,Legendary
0,45,49,49,1
1,60,63,62,1
2,80,83,82,1
3,80,123,100,1
4,65,43,52,1


In [12]:
#Create Model
model = logit(formula = formula , data = data).fit()

Optimization terminated successfully.
         Current function value: 0.177471
         Iterations 9


In [13]:
#Model Summary
model.summary()

0,1,2,3
Dep. Variable:,Legendary,No. Observations:,800.0
Model:,Logit,Df Residuals:,796.0
Method:,MLE,Df Model:,3.0
Date:,"Fri, 16 Jul 2021",Pseudo R-squ.:,0.3702
Time:,19:21:36,Log-Likelihood:,-141.98
converged:,True,LL-Null:,-225.45
Covariance Type:,nonrobust,LLR p-value:,5.8100000000000006e-36

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,11.5219,1.111,10.372,0.000,9.345,13.699
Attack,-0.0213,0.005,-4.174,0.000,-0.031,-0.011
Defense,-0.0328,0.006,-5.810,0.000,-0.044,-0.022
Speed,-0.0492,0.007,-6.940,0.000,-0.063,-0.035


In [14]:
#ODDs Ratio
print(np.exp(model.params))

Intercept    100902.279723
Attack            0.978960
Defense           0.967688
Speed             0.951980
dtype: float64


In [15]:
#Marginal Effects Computation
AME = model.get_margeff(at = 'Overall' , method ='dydx')

In [16]:
print("Average Marginal Effects")
print(AME.summary())

Average Marginal Effects
        Logit Marginal Effects       
Dep. Variable:              Legendary
Method:                          dydx
At:                           overall
                dy/dx    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Attack        -0.0011      0.000     -4.384      0.000      -0.002      -0.001
Defense       -0.0017      0.000     -6.125      0.000      -0.002      -0.001
Speed         -0.0025      0.000     -7.417      0.000      -0.003      -0.002


In [17]:
#Confusion Matrix
import numpy as np
from sklearn.metrics import classification_report,accuracy_score

In [18]:
#Compute Prediction
prediction = model.predict(exog = test_data)
#Define Cutoff
cutoff = 0.5
#Compute Class Predictions: y_predictions
y_prediction = np.where(prediction > cutoff , 1,0)
#Assign actual class labels from the test sample to y_actual
y_actual = test_data["Legendary"]
#Compute and Print confusion matrix using crosstab function
conf_matrix = pd.crosstab(y_actual , y_prediction,
                         rownames =['Actual'],
                         colnames =['Predicted'],
                         margins = True)

In [19]:
print(conf_matrix)

Predicted  0    1  All
Actual                
0          3    7   10
1          4  146  150
All        7  153  160


In [20]:
#Classification Accuracy
accuracy = accuracy_score(y_actual,y_prediction)

In [21]:
print('accuracy=%.3f' % accuracy + "%")
print(accuracy * 100)

accuracy=0.931%
93.125


In [22]:
#Classification Report
print(classification_report(y_actual,y_prediction))

              precision    recall  f1-score   support

           0       0.43      0.30      0.35        10
           1       0.95      0.97      0.96       150

    accuracy                           0.93       160
   macro avg       0.69      0.64      0.66       160
weighted avg       0.92      0.93      0.93       160



In [3]:
print('done')

done
