# Implementation OF Logistic Regression on PlayTennis

In [89]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [17]:
df = pd.read_csv('PlayTennis.csv')
df

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [91]:
df.dtypes

Outlook        object
Temperature    object
Humidity       object
Wind           object
Play Tennis    object
dtype: object

In [92]:
df.describe

<bound method NDFrame.describe of      Outlook Temperature Humidity    Wind Play Tennis
0      Sunny         Hot     High    Weak          No
1      Sunny         Hot     High  Strong          No
2   Overcast         Hot     High    Weak         Yes
3       Rain        Mild     High    Weak         Yes
4       Rain        Cool   Normal    Weak         Yes
5       Rain        Cool   Normal  Strong          No
6   Overcast        Cool   Normal  Strong         Yes
7      Sunny        Mild     High    Weak          No
8      Sunny        Cool   Normal    Weak         Yes
9       Rain        Mild   Normal    Weak         Yes
10     Sunny        Mild   Normal  Strong         Yes
11  Overcast        Mild     High  Strong         Yes
12  Overcast         Hot   Normal    Weak         Yes
13      Rain        Mild     High  Strong          No>

In [93]:
df.isnull().sum()

Outlook        0
Temperature    0
Humidity       0
Wind           0
Play Tennis    0
dtype: int64

In [94]:
df.shape

(14, 5)

In [95]:
df['Play Tennis'].value_counts()

Play Tennis
Yes    9
No     5
Name: count, dtype: int64

# Encoding Of Data

# Approach 1 : Using Label Encoder

In [96]:
le = LabelEncoder()
df_encoded = pd.DataFrame()

columns_to_encode = ['Outlook','Temperature','Humidity','Wind','Play Tennis']
for cols in columns_to_encode:
    df_encoded[cols] = le.fit_transform(df[cols])
df_encoded

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,2,1,0,1,0
1,2,1,0,0,0
2,0,1,0,1,1
3,1,2,0,1,1
4,1,0,1,1,1
5,1,0,1,0,0
6,0,0,1,0,1
7,2,2,0,1,0
8,2,0,1,1,1
9,1,2,1,1,1


In [97]:
X = df_encoded.drop(['Play Tennis'],axis = 1)
y = df_encoded['Play Tennis']


### Model 1 : Using Label Encoding 

In [18]:
lr1 = LogisticRegression(class_weight='balanced')
lr1.fit(X,y)


In [99]:
lr1.score(X,y)

0.7857142857142857

In [100]:
y_pred =lr1.predict(X)

In [101]:
print(confusion_matrix(y,y_pred))
print(accuracy_score(y,y_pred))
print(classification_report(y,y_pred))

[[4 1]
 [2 7]]
0.7857142857142857
              precision    recall  f1-score   support

           0       0.67      0.80      0.73         5
           1       0.88      0.78      0.82         9

    accuracy                           0.79        14
   macro avg       0.77      0.79      0.78        14
weighted avg       0.80      0.79      0.79        14



### Model 2 : Using One Hot Encoder

In [19]:
    enc = OneHotEncoder(sparse = False)
    feature_list = ['Outlook','Temperature','Humidity','Wind']
    one_hot_encoded = enc.fit_transform(df[feature_list])
    df_encoded2 = pd.DataFrame(one_hot_encoded, columns = enc.get_feature_names_out(feature_list))
    df_encoded2

TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'

In [103]:
X = df_encoded2
y = df['Play Tennis']
lr2 = LogisticRegression(class_weight='balanced')
lr2.fit(X,y)


In [104]:
lr2.score(X2,y2)

0.8571428571428571

In [105]:
y_pred =lr2.predict(X)

In [106]:
print(confusion_matrix(y,y_pred))
print(accuracy_score(y,y_pred))
print(classification_report(y,y_pred))

[[4 1]
 [1 8]]
0.8571428571428571
              precision    recall  f1-score   support

          No       0.80      0.80      0.80         5
         Yes       0.89      0.89      0.89         9

    accuracy                           0.86        14
   macro avg       0.84      0.84      0.84        14
weighted avg       0.86      0.86      0.86        14

