In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import random

# data load

In [2]:
base = os.getcwd()
data_path = os.path.join(base, 'data')

def load_data(name):
    return np.load(os.path.join(data_path, f"{name}.npy"))

def reshape(data):
    return data.reshape(data.shape[0] * 40 * 40, data.shape[-1])

In [3]:
data = reshape(load_data('dl_train'))

columns = [f"temp_{i}" for i in range(9)] + \
    ['type', 'GMI_long', 'GMI_lat', 'DPR_long', 'DPR_lat', 'precipitation']

In [4]:
data = pd.DataFrame(data, columns=columns)
print(data.shape)

(121561600, 15)


In [5]:
tdata = data.loc[:6080000 - 1] # 3648000
display(tdata)

Unnamed: 0,temp_0,temp_1,temp_2,temp_3,temp_4,temp_5,temp_6,temp_7,temp_8,type,GMI_long,GMI_lat,DPR_long,DPR_lat,precipitation
0,174.677109,97.509834,203.541229,143.567032,241.500748,222.171906,170.156082,276.060486,259.399963,0.0,159.494385,5.641016,159.641464,5.574192,0.0
1,174.911652,96.689583,203.997940,143.496780,240.993332,222.621750,169.170502,274.717133,256.798950,0.0,159.534912,5.609135,159.641464,5.574192,0.0
2,173.597321,97.981293,204.016159,143.279175,241.584793,221.940430,168.437149,274.446960,257.048035,0.0,159.575806,5.577742,159.641464,5.574192,0.0
3,174.986160,96.862984,201.832352,141.562363,239.778030,220.447968,166.742813,272.904358,253.840561,0.0,159.617081,5.546840,159.625137,5.532823,0.0
4,174.257904,96.435356,202.192291,142.080582,239.759964,221.311798,168.050186,272.944885,253.975052,0.0,159.658707,5.516435,159.669113,5.510320,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6079995,265.104980,263.576111,265.324097,264.116608,266.495239,265.552551,265.011230,265.368225,264.259094,111.0,108.384804,32.029858,108.366180,32.039097,0.0
6079996,265.083405,263.406403,266.032715,263.645172,267.008209,266.255249,263.995361,266.396973,265.043823,111.0,108.431709,32.063107,108.415306,32.064346,0.0
6079997,264.160980,262.510468,265.456329,263.353577,267.393402,265.695557,264.539307,267.371033,264.580353,110.0,108.477974,32.096664,108.465103,32.089935,0.0
6079998,263.488861,262.320984,265.694000,264.193695,265.173767,265.762451,264.122559,266.515045,265.698608,111.0,108.523994,32.130829,108.514961,32.115562,0.0


# lmplot

In [None]:
sns.lmplot(x='temp_0', y='precipitation', data=tdata, height=10, aspect=2)

In [None]:
for col in columns[:-1]:
    sns.lmplot(x=col, y='precipitation', data=data, height=10, aspect=2)
    
    plt.show()

# box plot

In [None]:
fig = plt.figure(figsize=(10, 16)) 
gs = gridspec.GridSpec(nrows=7,
                       ncols=2, 
                       height_ratios=[1, 1, 1, 1, 1, 1, 1],
                       width_ratios=[1,1])


for i, col in enumerate(columns[:-1]):
    ax = plt.subplot(gs[i])
    ax.boxplot(data[col])
    ax.set_title(col)

# plt.subplots_adjust(wspace=2, hspace=2)
plt.show()

# Scatter

In [None]:
fig = plt.figure(figsize=(16, 16)) 
gs = gridspec.GridSpec(nrows=5,
                       ncols=2, 
                       height_ratios=[1, 1, 1, 1, 1],
                       width_ratios=[1,1])


for i, col in enumerate([f"temp_{i}" for i in range(9)]):
    ax = plt.subplot(gs[i])
    ax.scatter(col, 'precipitation', data=data)
    ax.set_title(col)

# plt.subplots_adjust(wspace=2, hspace=2)
plt.show()