# **Predictive Analytics**

- Import Library

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

- Load Data

Information : 
- Radiation : Radiasi Solar watts per meter^2
- Temperature : Suhu dalam derajat Fahrenheit
- Humidity : Percent
- Barometric Pressure : Hg
- Wind Direction : Degrees
- Wind Speed : miles per hour
- Sunrise & Sunset : hawai time

In [3]:
df = pd.read_csv('../data/raw/SolarPrediction.csv')

## Exploration Data Analysis

In [None]:
df.head()

In [None]:
df.dtypes

## Data Preprocessing

In [None]:
def time_to_minutes(t):
    return t.hour * 60 + t.minute + t.second / 60

In [None]:
df['Temperature'] = df['Temperature'].astype(float)
df['Humidity'] = df['Humidity'].astype(float)
df['Time'] = pd.to_datetime(df['Time'], format="%H:%M:%S").dt.time
df['TimeSunRise'] = pd.to_datetime(df['TimeSunRise'], format="%H:%M:%S").dt.time
df['TimeSunSet'] = pd.to_datetime(df['TimeSunSet'], format="%H:%M:%S").dt.time

df['SunRiseMinutes'] = df['TimeSunRise'].apply(time_to_minutes)
df['SunSetMinutes'] = df['TimeSunSet'].apply(time_to_minutes)
df['CurrentMinutes'] = df['Time'].apply(time_to_minutes)
df['MinutesSinceSunrise'] = df['CurrentMinutes'] - df['SunRiseMinutes']
df['MinutesUntilSunset'] = df['SunSetMinutes'] - df['CurrentMinutes']
df['DaylightDuration'] = df['SunSetMinutes'] - df['SunRiseMinutes']
df = df.drop(columns=['UNIXTime','Data','Time','TimeSunRise','TimeSunSet'])

In [None]:
sns.heatmap(df.corr(), annot=True)


In [None]:
df.isna().sum()


In [None]:
numeric =  df.select_dtypes(include="number").columns


In [None]:
for i in numeric:
    Q1 = df[i].quantile(0.25)
    Q3 = df[i].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR

    outlier = df[(df[i] < lower) | (df[i] > upper)]
    print(f"Kolom {i} : {len(outlier)} outlier")

In [None]:
def outlierhandling(series):
    Q1 = df[series].quantile(0.25)
    Q3 = df[series].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    df.loc[df[series] > upper, series] = upper
    df.loc[df[series] < lower, series] = lower
    return df

In [None]:
for i in numeric:
    df = outlierhandling(i)
for i in numeric:
    Q1 = df[i].quantile(0.25)
    Q3 = df[i].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR

    outlier = df[(df[i] < lower) | (df[i] > upper)]
    print(f"Kolom {i} : {len(outlier)} outlier")

In [None]:
X = df.drop('Radiation',axis=1)
y = df['Radiation']

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Modelling & Evaluasi

## Testing