# `Predicting whether Yoga was performed or not.`

In [387]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## 1) Importing the Dataset

In [388]:
df = pd.read_csv("./Screen Time Data.csv")

In [389]:
df.head()

Unnamed: 0,index,Date,Week Day,Total Screen Time,Social Networking,Reading and Reference,Other,Productivity,Health and Fitness,Entertainment,Creativity,Yoga
0,0,04/17/19,Wednesday,187,89,17,41,22,0,0,0,0
1,1,04/18/19,Thursday,123,78,17,8,9,0,0,0,0
2,2,04/19/19,Friday,112,52,40,8,4,0,3,0,0
3,3,04/20/19,Saturday,101,69,9,38,2,0,3,0,0
4,4,04/21/19,Sunday,56,35,2,43,3,0,1,1,0


In [390]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   index                  28 non-null     int64 
 1   Date                   28 non-null     object
 2   Week Day               28 non-null     object
 3   Total Screen Time      28 non-null     int64 
 4   Social Networking      28 non-null     int64 
 5   Reading and Reference  28 non-null     int64 
 6   Other                  28 non-null     int64 
 7   Productivity           28 non-null     int64 
 8   Health and Fitness     28 non-null     int64 
 9   Entertainment          28 non-null     int64 
 10  Creativity             28 non-null     int64 
 11  Yoga                   28 non-null     int64 
dtypes: int64(10), object(2)
memory usage: 2.8+ KB


### Column Datatypes

In [391]:
df.dtypes

index                     int64
Date                     object
Week Day                 object
Total Screen Time         int64
Social Networking         int64
Reading and Reference     int64
Other                     int64
Productivity              int64
Health and Fitness        int64
Entertainment             int64
Creativity                int64
Yoga                      int64
dtype: object

### Checking for null values

In [392]:
df.isnull().sum()

index                    0
Date                     0
Week Day                 0
Total Screen Time        0
Social Networking        0
Reading and Reference    0
Other                    0
Productivity             0
Health and Fitness       0
Entertainment            0
Creativity               0
Yoga                     0
dtype: int64

## 2) Data Preprocessing

### Dummy Variables

In [393]:
weekday = pd.get_dummies(df["Week Day"], drop_first=True)
weekday.head()

Unnamed: 0,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
0,0,0,0,0,0,1
1,0,0,0,1,0,0
2,0,0,0,0,0,0
3,0,1,0,0,0,0
4,0,0,1,0,0,0


In [394]:
df = pd.concat([df, weekday], axis=1)

df = df.drop(["Week Day"], axis=1)

### Removing index column

In [395]:
df = df.drop("index", axis=1)

In [396]:
df.head()

Unnamed: 0,Date,Total Screen Time,Social Networking,Reading and Reference,Other,Productivity,Health and Fitness,Entertainment,Creativity,Yoga,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
0,04/17/19,187,89,17,41,22,0,0,0,0,0,0,0,0,0,1
1,04/18/19,123,78,17,8,9,0,0,0,0,0,0,0,1,0,0
2,04/19/19,112,52,40,8,4,0,3,0,0,0,0,0,0,0,0
3,04/20/19,101,69,9,38,2,0,3,0,0,0,1,0,0,0,0
4,04/21/19,56,35,2,43,3,0,1,1,0,0,0,1,0,0,0


### Converting Date into Day, Month, Year (check if it is useful)

In [397]:
mode = int(input("Enter mode: "))

In [398]:
if mode == 1:
    df["Day"] = df["Date"].apply(lambda x: int(x.split("/")[1]))
    df["Month"] = df["Date"].apply(lambda x: int(x.split("/")[0]))
    df["Year"] = df["Date"].apply(lambda x: int(x.split("/")[2]))
    df = df.drop(["Date"], axis=1)

else :
    df = df.drop(["Date"], axis=1)

df.head()

Unnamed: 0,Total Screen Time,Social Networking,Reading and Reference,Other,Productivity,Health and Fitness,Entertainment,Creativity,Yoga,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Day,Month,Year
0,187,89,17,41,22,0,0,0,0,0,0,0,0,0,1,17,4,19
1,123,78,17,8,9,0,0,0,0,0,0,0,1,0,0,18,4,19
2,112,52,40,8,4,0,3,0,0,0,0,0,0,0,0,19,4,19
3,101,69,9,38,2,0,3,0,0,0,1,0,0,0,0,20,4,19
4,56,35,2,43,3,0,1,1,0,0,0,1,0,0,0,21,4,19


### Checking Column Datatypes

In [399]:
df.dtypes

Total Screen Time        int64
Social Networking        int64
Reading and Reference    int64
Other                    int64
Productivity             int64
Health and Fitness       int64
Entertainment            int64
Creativity               int64
Yoga                     int64
Monday                   uint8
Saturday                 uint8
Sunday                   uint8
Thursday                 uint8
Tuesday                  uint8
Wednesday                uint8
Day                      int64
Month                    int64
Year                     int64
dtype: object

## 3) Building the Model

### Splitting into X and y

In [400]:
X = df.drop(["Yoga"], axis=1)
y = df["Yoga"]

### Splitting into Train and Test

In [401]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=42)

In [402]:
X_train.shape, X_test.shape

((19, 17), (9, 17))

In [403]:
y_train.shape, y_test.shape

((19,), (9,))

### Logistic Regression

In [404]:
from sklearn.linear_model import LogisticRegression
logmodel = LogisticRegression(max_iter=1000)

### Training the Model

In [405]:
logmodel.fit(X_train, y_train)

## 4) Evaluating the Model

In [406]:
predictions = logmodel.predict(X_test)

In [407]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, predictions))
print()
print(classification_report(y_test, predictions))

[[3 1]
 [1 4]]

              precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           1       0.80      0.80      0.80         5

    accuracy                           0.78         9
   macro avg       0.78      0.78      0.78         9
weighted avg       0.78      0.78      0.78         9

