<a href="https://colab.research.google.com/github/Redwoods/Arduino/blob/master/ar-iot/py-pandas/iot_nano33_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Pandas: access to the remote json from MongoDB
- The json file is generated on the fly from the express server of Node.js.
- The data stored in MongoDB are saved in the json file.
- The data are composed of three time series; temperature, humidity, and luminosity.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation, rc

## [데이터 로드 1.]
- loading real-time json file from MongoDB via web

- http://life21c.inje.ac.kr:3030/client_33.html

In [None]:
# loading real-time json file from MongoDB via web (CORS, port=3030)
# url="http://life21c.inje.ac.kr:3030/iot"
# df=pd.read_json(url)
# print('Large IoT data was retrieved successfully from MongoDB!')

## [데이터 로드 2.]
- Load the compressed data from github.com

In [None]:
# Load the compressed data from github.com
# https://towardsdatascience.com/4-awesome-ways-of-loading-ml-data-in-google-colab-9a5264c61966
# url_zip = 'https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip'
!wget https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip
!unzip iot_data_201005_raw.zip

In [None]:
df = pd.read_csv("iot_data_201005_raw.csv", index_col=False)
df.head()

In [None]:
df.tail()

In [None]:
df.shape, type(df)

In [None]:
df['date'][:1],df['date'][-1:]

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# Save iot data in csv file.
# df.to_csv('iot_data_201005_raw.csv')

## Check data
- NaN
- missing value

In [None]:
df.isnull().sum() #/df.shape[0]*100

In [None]:
df.isna().sum()

## Replace NaN with mean: Imputation
- NaN ==>> 33.3 (100/3)

In [None]:
df=df.fillna(33.3)
df.isna().sum()

In [None]:
# for whole dataframe
# df = df.replace(np.nan, 33.3)
# df.isnull().sum()

## IoT data were cleaned!

In [None]:
df.head()

## New DataFrame : iot_data
### Dataframe with date and sensor values(temperature, humidity, luminosity, pressure, r, g, b)

In [None]:
#  select columns
cols = ['date', 'temperature', 'humidity', 'luminosity', 'pressure', 'r_ratio', 'g_ratio', 'b_ratio']

In [None]:
# Make iot data
iot_data = df[cols]

In [None]:
iot_data.shape

In [None]:
iot_data.head()  # date와 7 개의 신호를 확인.

In [None]:
iot_data.info()

In [None]:
iot_data.isna().sum()

In [None]:
# Save iot data in csv file.
iot_data.to_csv('iot_data_201005_clean.csv')

## EDA

In [None]:
# Plot boxplot
# iot_data[['temperature','humidity','luminosity']].plot.box(title='Boxplot of temperature, humidity, and luminosity')
# iot_data.loc[:,'temperature':'luminosity'].plot.box(title='Boxplot of temperature, humidity, and luminosity')
iot_data.iloc[:,1:].plot.box(title='Boxplot of all signals')
# plt.xticks(rotation=60)
plt.show()

### Plot time series of sensor data

In [None]:
iot_data.head()

In [None]:
iot_data.plot(x='date', y='temperature', figsize=(10,5), title='temperature')
plt.xticks(rotation=90)
plt.show()

### Set style of graph
- plt.style.use('fivethirtyeight')
- plt.style.use('ggplot')

In [None]:
plt.style.available

In [None]:
iot_data.plot(x='date', y='temperature', figsize=(10,5), title='temperature')
plt.xticks(rotation=60)
plt.style.use('fivethirtyeight') # 'seaborn-pastel', 'fivethirtyeight'
plt.show()

In [None]:
iot_data.plot(x='date', y='temperature', figsize=(10,5), title='temperature')
plt.xticks(rotation=60)
plt.style.use('ggplot')
plt.show()

In [None]:
# restore default style
plt.style.use('default')

In [None]:
iot_data.plot(x='date', y='humidity', figsize=(10,5), title='humidity')
plt.xticks(rotation=60)
plt.show()

In [None]:
iot_data.plot(x='date', y='luminosity', figsize=(10,5), title='luminosity')
plt.xticks(rotation=60)
plt.show()

In [None]:
iot_data.plot(x='date', y='pressure', figsize=(10,5), title='pressure')
plt.xticks(rotation=90)
plt.show()

#### 다중그래프 (multiple graphs)

In [None]:
# Plot of ['temperature','humidity']
iot_data.plot(x='date', y=['temperature','humidity'], figsize=(10,5), title='temperature and humidity')

In [None]:
# Plot of ['temperature','humidity','luminosity','pressure']
iot_data.plot(x='date', y=['temperature','humidity','luminosity','pressure'], figsize=(10,6),
             title='temperature, humidity, luminosity and pressure')
plt.xticks(rotation=60)
plt.show()

#### Plot the mean of sensor data

In [None]:
iot_data[['temperature','humidity','luminosity']].mean()

In [None]:
iot_data[['temperature','humidity','luminosity']].mean().plot.bar(figsize=(8,5),
                                                                  title="Mean of temperature, humidity, and luminosity")

In [None]:
plt.style.use('seaborn-pastel')   # fivethirtyeight, ggplot
iot_data[['temperature','humidity','luminosity']].mean().plot.bar(figsize=(6,3),
                                                                  title="Mean of temperature, humidity, and luminosity")

# Advanced graphics using pandas

### Plot the change of sensor data over various time spans.

> time span: 60 sec, 1 hour, 1 day, 1 week

In [None]:
iot_data.head()

#### Set date as index of timestamp

In [None]:
iot_data.set_index('date',inplace=True)

In [None]:
iot_data.head()

In [None]:
iot_data.info()

In [None]:
# Convert date index to Datetime index, 중요!!!
iot_data.index = pd.to_datetime(iot_data.index)

In [None]:
iot_data.info() # Range index -> timestamp(Datetime) index

In [None]:
iot_data.head()

In [None]:
# Estimate the mean of the iot data for every minute
iot_data.resample('60S').mean()

In [None]:
#  Plot mean of the iot data per every minute
iot_data.resample('60S').mean().plot(figsize=(8,5),
                                     title='Minutely change of temperature, humidity, and luminosity, and all')
plt.legend(bbox_to_anchor=(1.02, 1.0))

In [None]:
#  Plot mean of the iot data per every minute
iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('60S').mean().plot(figsize=(10,5),
                                     title='Minutely change of R, G, B',
                                     color="rgb")

In [None]:
# Estimate the mean of iot data for every hour
iot_data.resample('H').mean()  # mean per each hour

In [None]:
iot_data.resample('H').mean().shape, iot_data.resample('H').mean().info()

In [None]:
#  Plot mean of the iot data per every hour
iot_data.resample('H').mean().plot(figsize=(10,6),
                                   title='Hourly change of all sensor data') #,
                                #    ylim=[0,500])

In [None]:
#  Plot mean of the iot data per every minute
iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('H').mean().plot(figsize=(10,6),
                                    color = ['red', 'green', 'blue'],
                                    title='Hourly change of R, G, B')

## [도전하기]  하루 24 시간 동안의 R,G,B 변화 그래프를 그려보시오.

In [None]:
# Estimate the mean of iot data for every day
iot_data.resample('D').mean()

In [None]:
iot_data.resample('D').mean().shape, iot_data.resample('D').mean().info()

In [None]:
iot_data.resample('D').mean().head()

In [None]:
iot_data_day=iot_data.resample('D').mean()
iot_data_day.head()

In [None]:
iot_data_day.info()

In [None]:
#  Plot mean of the iot data per every day
iot_data.resample('D').mean().plot(kind='bar', #marker='o', ms=6, figsize=(12,6),
                                   title='Daily change of all sensor data')

In [None]:
#  Plot mean of the iot data per every day
iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('D').mean().plot(figsize=(10,6),
                                    color = ['r','g','b'], #marker='o', ms=6,
                                    title='Dayly change of R, G, B')

## 위의 그래프를 파이 그래프로 그려보시오.

In [None]:
#  Plot mean of the iot data per every day
# df.plot.pie(subplots=True, figsize=(6, 3))
iot_rgb = iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('D').mean()
iot_rgb.head()


In [None]:
iot_rgb.iloc[0],iot_rgb.iloc[1]

In [None]:
iot_rgb.index, len(iot_rgb.index)

In [None]:
# Convert timestamp to date string
iot_rgb.index[0].strftime('%Y-%m-%d')

In [None]:
iot_rgb.iloc[0].plot.pie(subplots=True, figsize=(5,3),
                         colors = ['red', 'green', 'blue'], labeldistance=None,
                         autopct='%.2f',
                         title='Daily change of R, G, B')
plt.ylabel(iot_rgb.index[0].strftime('%Y-%m-%d'))
plt.show()

## Weekly graph

In [None]:
# Estimate the mean of iot data for every week
iot_data.resample('W').mean()

In [None]:
#  Plot mean of the iot data per every week
iot_data.resample('W').mean().plot(kind='bar', #marker='o', ms=10,
                                   figsize=(12,6),
                                   title='Weekly change of temperature, humidity, and luminosity, pressure, R, G , B')

In [None]:
#  Plot mean of the iot data per every week
iot_data['temperature'].resample('W').mean().plot(kind='bar', #marker='o', ms=10,
                                   figsize=(12,6),
                                   title='Weekly change of temperature')

In [None]:
#  Plot mean of the iot data per every week
iot_data[['temperature','humidity']].resample('W').mean().plot(kind='bar', #marker='o', ms=10,
                                   figsize=(12,6),
                                   title='Weekly change of temperature and humidity')

# [도전] 다중 파이 그래프

### 다중 파이 그래프
- https://medium.com/@kvnamipara/a-better-visualisation-of-pie-charts-by-matplotlib-935b7667d77f

In [None]:
iot_rgb.shape,iot_rgb.index.size

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=iot_rgb.index.size, figsize=(20,5))

fig.subplots_adjust(hspace=0.5, wspace=0.05)

for row in range(iot_rgb.index.size):
    fig.add_subplot(axs[row] )
    plt.pie(iot_rgb.iloc[row],
            colors = ['red', 'green', 'blue'], labeldistance=None,
                         autopct='%.2f')  #, labels=df.columns)
    plt.ylabel(iot_rgb.index[row].strftime('%Y-%m-%d'))
    # plt.axis('off')

### axes array 코드 참조
```
for row in ax:
    for col in row:
        col.plot(x, y)
```

In [None]:
fig, axs = plt.subplots(nrows=(iot_rgb.index.size-1)//7+1, ncols=7, figsize=(16,16))
# axs.set_axis_off()

fig.subplots_adjust(hspace=0.5, wspace=0.05)
idx=0
for row in axs:
    # fig.add_subplot(axs[row] )
    # fig.add_subplot(5, 7, row+1)
    # ax = axs[row]
    for col in row:
        col.pie(iot_rgb.iloc[idx],
            colors = ['red', 'green', 'blue'], labeldistance=None,
                         autopct='%.2f')  #, labels=df.columns)

        col.set_ylabel(iot_rgb.index[idx].strftime('%Y-%m-%d'),fontsize=12)
        idx = idx + 1

        if idx == iot_rgb.index.size:
            idx = iot_rgb.index.size-1

    # plt.axis('off')
    # plt.axis('off')
    # plt.grid(False)
plt.show()

## 와, Great!

---