In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. Veriyi Yükleme ve Hazırlama
df = pd.read_excel('train_data.xlsx')
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')
df.set_index('Date', inplace=True)

# 2. Temel İstatistik ve Korelasyon
corr = df[['Üretim (kWh)', 'Temp_mean_C', 'CloudCover_%']].corr()

# 3. Görselleştirmeler
def plot_histogram(data, column, title, xlabel, bins=20):
    plt.figure()
    plt.hist(data.dropna(), bins=bins)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel('Frequency')
    plt.tight_layout()

# 3.1 Histogramlar
plot_histogram(df, 'Üretim (kWh)', 'Histogram of Daily Production', 'Üretim (kWh)')
plot_histogram(df, 'Temp_mean_C', 'Histogram of Mean Daily Temperature', 'Temp_mean_C')
plot_histogram(df, 'CloudCover_%', 'Histogram of Daily Cloud Cover %', 'CloudCover_%')

# 3.2 Scatter Plots
plt.figure()
plt.scatter(df['Temp_mean_C'], df['Üretim (kWh)'], alpha=0.7)
plt.title('Production vs. Temperature')
plt.xlabel('Temp_mean_C')
plt.ylabel('Üretim (kWh)')
plt.tight_layout()

plt.figure()
plt.scatter(df['CloudCover_%'], df['Üretim (kWh)'], alpha=0.7)
plt.title('Production vs. Cloud Cover')
plt.xlabel('CloudCover_%')
plt.ylabel('Üretim (kWh)')
plt.tight_layout()

# 3.3 Korelasyon Matris Isı Haritası
plt.figure(figsize=(6,5))
plt.imshow(corr, interpolation='nearest', cmap='viridis')
plt.title('Correlation Matrix')
plt.xticks(range(len(corr)), corr.columns, rotation=45)
plt.yticks(range(len(corr)), corr.index)
for i in range(len(corr)):
    for j in range(len(corr)):
        plt.text(j, i, f"{corr.iloc[i,j]:.2f}", ha='center', va='center', color='white')
plt.colorbar()
plt.tight_layout()

# 3.4 Zaman Serisi Trendleri
# 30 günlük hareketli ortalama
rolling_prod = df['Üretim (kWh)'].rolling(window=30).mean()
plt.figure()
plt.plot(rolling_prod.index, rolling_prod.values)
plt.title('30-Day Rolling Mean of Production')
plt.xlabel('Date')
plt.ylabel('Rolling Mean Üretim (kWh)')
plt.tight_layout()

# Aylık ortalama üretim
monthly_avg = df['Üretim (kWh)'].resample('M').mean()
plt.figure()
plt.bar(monthly_avg.index.strftime('%Y-%m'), monthly_avg.values)
plt.title('Monthly Average Production')
plt.xlabel('Month')
plt.ylabel('Average Üretim (kWh)')
plt.xticks(rotation=45)
plt.tight_layout()

# 3.5 Ek Grafik: Mevsimsel Dekompozisyon (opsiyonel) 
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df['Üretim (kWh)'], model='additive', period=365)
plt.figure()
result.plot()
plt.suptitle('Seasonal Decomposition of Production', y=1.02)
plt.tight_layout()

# 4. Göster
plt.show()
