# 1) Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

# 2) Loading Dataset

In [None]:
iot_data = pd.read_csv('../input/environmental-sensor-data-132k/iot_telemetry_data.csv')
iot_data

In [None]:
iot_data.info()

# 3) Preprocessing of Data

In [None]:
iot_data['time_stamp'] = pd.to_datetime(iot_data['ts'], unit='s')
#since in the Time column, a date isn’t specified and hence Pandas will put Some date automatically in that case.
iot_data.drop(columns=['ts'], inplace=True) 
print(iot_data.head())

In [None]:
sns.heatmap(iot_data.corr()) 

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder=LabelEncoder()
Devices = labelencoder.fit_transform(iot_data['device'])
Light = labelencoder.fit_transform(iot_data['light'])
Motion = labelencoder.fit_transform(iot_data['motion'])

onehotencoder=OneHotEncoder()

In [None]:
iot_data['device'] = Devices
iot_data['light'] = Light
iot_data['motion'] = Motion
iot_data

In [None]:
iot_data_df = pd.DataFrame(iot_data)
iot_data_df.head()

In [None]:
iot_data_df.isnull().sum()

In [None]:
#converting the given temperature in  Fahrenheit to degree Celsius
iot_data_df['temp'] = (iot_data_df['temp'] * 1.8) + 32
iot_data_df

# 4) Visualization

In [None]:
affect=['co', 'humidity', 'lpg', 'smoke', 'temp']
slice=[3,7,8,6,9]
color=['r', 'g', 'm', 'b', 'c']

plt.pie(slice, labels=affect, colors=color, startangle=90,shadow=True, 
       explode=(0,0,0,0.1,0), autopct='%1.2f%%')
plt.legend(bbox_to_anchor =(0.85, 1.20), ncol = 2) 
plt.show()

In [None]:
sns.set_style('darkgrid')
sns.countplot('device', hue='motion',palette="rocket", edgecolor=sns.color_palette("dark", 3),linewidth=2, data=iot_data_df)

In [None]:
sns.scatterplot('device', 'time_stamp', hue= 'motion', style='light' , data=iot_data_df)

In [None]:
iot_data_df.drop('time_stamp', axis=1, inplace=True)
iot_data_df.head()

# 5) Spliting Dataset into Train & Test

In [None]:
x = iot_data_df.drop('motion', axis= 1)
y = iot_data_df['motion'].values
y

In [None]:
x

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.05, random_state=0)

# 6) Training and Predicting

In [None]:
from sklearn.linear_model import LogisticRegression
reg = LogisticRegression()
reg.fit(X_train, y_train)

In [None]:
prediction = reg.predict(X_test)
prediction

In [None]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix
cnf_matrix = metrics.confusion_matrix(y_test, prediction)
#cnf_matrix
sns.heatmap(cnf_matrix, annot=True, cmap="Spectral" ,fmt='g', linewidth = 3)
plt.tight_layout()
plt.title('Confusion matrix')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, prediction))
print("Precision:",metrics.precision_score(y_test, prediction))
print("Recall:",metrics.recall_score(y_test, prediction))

> # Since motion had only 482 values =True which is 0% value of the entire Dataset in the Motion column others are False (405k = 100%) , so the machine will not predict the right accuracy.
> 
> # Accuracy is approx 100% which means machine is overfitted.
> 
> # Precision = 0,  which means that the machine could not find relevant data  with true values.
> 
> # Recall = 0, means that all relavant data couldn't be retrieved by the search