In [None]:
import pandas as pd
from sklearn.linear_model import Ridge  
import numpy as np
import os


def combine_and_sort_predictions(hourly_predictions_dir):
    
    all_predictions = []
    all_trends = []
    
    for hour in range(1, 25):
        
        file_path = f'{hourly_predictions_dir}/hour_{hour}_predictions.csv'
        if os.path.exists(file_path):
            predictions_df = pd.read_csv(file_path)
            
            all_predictions.append(predictions_df['Predicted'].values)
            all_trends.append(predictions_df['trend'].values)
        else:
            print(f"file {file_path} not exist")
    
    all_predictions = np.concatenate(all_predictions)
    
    all_trends = np.concatenate(all_trends)
    
    sorted_indices = np.argsort(all_trends)
    
    sorted_predictions = all_predictions[sorted_indices]
    return sorted_predictions


def train_and_predict():
    
    if not os.path.exists('./hourly_predictions'):
        os.makedirs('./hourly_predictions')
    
    for hour in range(1, 25):
        
        X_train_file = f'./train_splits/X_train_hour_{hour}.csv'
        Y_train_file = f'./train_splits/Y_train_hour_{hour}.csv'
        X_test_file = f'./test_splits/X_test_hour_{hour}.csv'
        Y_test_file = f'./test_splits/Y_test_hour_{hour}.csv'
        X_train = pd.read_csv(X_train_file)
        Y_train = pd.read_csv(Y_train_file)
        X_test = pd.read_csv(X_test_file)
        Y_test = pd.read_csv(Y_test_file)
        
        
        model = Ridge(alpha=1.0)
        
        
        model.fit(X_train, Y_train)
        
        
        Y_test_pred = model.predict(X_test)
        
        
        test_results = pd.DataFrame({
            'Predicted': Y_test_pred[:, 0],  
            'trend': X_test['trend'].values  
        })
        test_results.to_csv(f'./hourly_predictions/hour_{hour}_predictions.csv', index=False)
    
    
    sorted_predictions = combine_and_sort_predictions('./hourly_predictions')
    sorted_predictions_df = pd.DataFrame(sorted_predictions, columns=['Predicted'])
    sorted_predictions_df.to_csv('prediction_Ridge.csv', index=False)  

train_and_predict()