
# 05WeatherCOVID19-Ver2

# Libraries

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from IPython.display import display

sns.set_palette("pastel")

# Loading Data

In [2]:
weather = pd.read_csv(
    "C:\py\Projects\TuringCollege\COVID19\DataSets\\weather.csv",
    parse_dates=["date"],
    index_col = 'date'
)

# Display Data

In [3]:
pd.set_option("display.max.columns", None)
pd.set_option("max_rows", None)

# Size

In [4]:
weather.shape

(2896, 9)

# Data Types

In [5]:
weather.dtypes

code                       int64
province                  object
avg_temp                 float64
min_temp                 float64
max_temp                 float64
precipitation            float64
max_wind_speed           float64
most_wind_direction      float64
avg_relative_humidity    float64
dtype: object

# Missing Data

In [6]:
weather.isna().sum()

code                     0
province                 0
avg_temp                 0
min_temp                 0
max_temp                 0
precipitation            0
max_wind_speed           0
most_wind_direction      1
avg_relative_humidity    0
dtype: int64

# Delete Unneeded Columns

In [7]:
weather = weather.drop(
    [
        "code",
        "max_wind_speed",
        "most_wind_direction",
        "min_temp",
        "max_temp"
    ],
    axis=1,
)

# Rename Columns

In [8]:
weather.rename(
    columns={
        "province": "Province",
        "avg_temp": "Avg Temp",
        "precipitation": "Precipitation",
        "avg_relative_humidity": "Avg Relative Humidity"
        
    },
    inplace=True,
)

# Column Order

In [9]:
weather = weather[['Province', 'Avg Temp', "Precipitation", "Avg Relative Humidity"]]

# Dataset

In [10]:
weather.index.names = ['Date']
weather

Unnamed: 0_level_0,Province,Avg Temp,Precipitation,Avg Relative Humidity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,Seoul,-2.2,0.0,64.4
2020-01-01,Busan,1.9,0.0,44.0
2020-01-01,Daegu,0.2,0.0,53.3
2020-01-01,Gwangju,-0.3,0.0,58.0
2020-01-01,Incheon,-1.4,0.0,66.6
2020-01-01,Daejeon,-0.6,0.0,65.8
2020-01-01,Ulsan,1.3,0.0,50.4
2020-01-01,Gyeonggi-do,-2.2,0.0,79.3
2020-01-01,Gangwon-do,1.7,0.0,42.5
2020-01-01,Chungcheongbuk-do,-3.8,0.0,67.3


# Average Temp by Province

In [11]:
avg_temp_by_province = pd.pivot_table(
    data=weather, index="Date", columns="Province", values="Avg Temp", aggfunc="mean"
)

In [12]:
provinces_to_keep = ['Daegu','Gyeongsangbuk-do','Seoul','Gyeonggi-do']
avg_temp_by_province = avg_temp_by_province[provinces_to_keep]

In [13]:
avg_temp_by_province = avg_temp_by_province.asfreq(freq='W')

In [14]:
avg_temp_by_province.columns.name = None              
avg_temp_by_province = avg_temp_by_province.reset_index()              

In [15]:
avg_temp_by_province = avg_temp_by_province.iloc[3:]

In [16]:
avg_temp_by_province = avg_temp_by_province.set_index('Date')
avg_temp_by_province

Unnamed: 0_level_0,Daegu,Gyeongsangbuk-do,Seoul,Gyeonggi-do
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-26,6.7,6.4,5.5,5.3
2020-02-02,3.6,4.7,1.5,1.0
2020-02-09,1.9,2.5,-0.5,-0.9
2020-02-16,5.0,4.5,-1.4,-1.1
2020-02-23,4.9,5.2,2.5,2.2
2020-03-01,9.4,8.3,5.8,4.8
2020-03-08,8.3,7.7,9.3,7.4
2020-03-15,6.2,5.6,4.0,4.5
2020-03-22,14.1,10.1,11.0,10.9
2020-03-29,8.7,6.4,8.7,7.4
