In [None]:
!pip install pandas numpy matplotlib neuralprophet scikit-learn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from neuralprophet import NeuralProphet

In [None]:
import os

dir = './input/'

# Get a list of all files in the 'input' directory
files = os.listdir(dir)

# Print the list of files
for file in files:
    print(file)

In [None]:
# Initialize an empty list to store the dataframes
df_list = []

# Loop over the years 1997 to 2024
for file in files:
    # Read the CSV file for the current year
    df = pd.read_csv(dir + file, encoding='CP949')

    # Select and rename columns
    df = df[['일시', '평균기온(°C)']].rename(columns={'일시': 'date', '평균기온(°C)': 'temperature'})

    # Convert 'date' to datetime format and extract year, month, and day
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day

    # Select the final columns and append the dataframe to the list
    df = df[['date', 'year', 'month', 'day', 'temperature']]
    df_list.append(df)

# Concatenate all the dataframes in the list into a single dataframe
df = pd.concat(df_list, ignore_index=True)
df

In [None]:
# Split the data into training and test sets based on the year
df_train = df[df['year'] <= 2022][['date', 'temperature']].rename(
    columns={'date': 'ds', 'temperature': 'y'}).reset_index(drop=True)
df_test = df[df['year'] >= 2023][['date', 'temperature']].rename(
    columns={'date': 'ds', 'temperature': 'y'}).reset_index(drop=True)

df_test.tail()

In [None]:
# Initialize the Prophet model for temperature forecasting
model = NeuralProphet(
	growth='off',
	yearly_seasonality=True,
    	weekly_seasonality=False,
	daily_seasonality=False,
    	epochs=100,
	learning_rate=0.1
)


metrics = model.fit(df_test, freq='D', validation_df=df_test, progress='plot')

future = model.make_future_dataframe(df_test, periods=365)
pred = model.predict(future)

In [None]:
#yhat1과 실제값 시각화
forecast = model.predict(df_test)
fig = model.plot(forecast[['ds', 'y', 'yhat1']])
fig.show()

In [None]:
forecast = model.predict(df_test)
model = model.highlight_nth_step_ahead_of_each_forecast(1)
fig = m.plot(forecast[-3*24:]) #3일간 데이터 확인
fig.show()

In [None]:
fig_param = m.plot_parameters()
fig_param.show()

In [None]:
# Plot the forecast with test data
plt.figure(figsize=(10, 6), dpi=100)
plt.plot(df_test['y'].reset_index(drop=True), c='r')
plt.plot(forecast['yhat'][-len(df_test):].reset_index(drop=True), c='b')
plt.show()

In [None]:
from scipy.stats import pearsonr

# Calculate the Pearson correlation coefficient and the p-value
r, p = pearsonr(df_test['y'], forecast['yhat1'][-len(df_test):])

print(f'Pearson correlation: {r}')
print(f'p-value: {p}')

In [None]:
from sklearn.metrics import mean_squared_error

# Calculate the mean squared error
mse = mean_squared_error(df_test['y'], forecast['yhat'][-len(df_test):])

# Print the mean squared error
print('Mean Squared Error:', mse)
