In [2]:
import pandas as pd
import math
import random
import sqlite3
import numpy as np

# Define a simple linear regression function
def LinearRegressionFit(X, y):
    n = len(X)
    x_mean = sum(X) / n
    y_mean = sum(y) / n

    numerator = sum((X[i] - x_mean) * (y[i] - y_mean) for i in range(n))
    denominator = sum((X[i] - x_mean) ** 2 for i in range(n))

    b1 = numerator / denominator
    b0 = y_mean - b1 * x_mean

    return b0, b1

# Load your historical data up to 2020
data = pd.read_csv('C:/Users/PREDATOR/Documents/college project/renewable.csv')

# Handle missing values (you can use imputation or drop rows with missing values)
data = data.dropna()

# Define the independent variables (features) and target variable
feature_columns = ['Electricity from hydro (TWh)', 'Electricity from wind (TWh)', 'Electricity from solar (TWh)', 'Other renewables including bioenergy (TWh)', 'Total (TWh)']

# Create a new DataFrame for the years 2021 to 2050
years = list(range(2021, 2051))

# Initialize a list to store predictions for each entity and each column
all_predictions = []

# Group the data by the 'Country' column and rename it to 'Entity'
grouped_data = data.groupby('Entity')

# Function to calculate the probability of achieving the predicted outcome
def calculate_probability(prediction, prediction_interval):
    if prediction <= prediction_interval[1] and prediction >= prediction_interval[0]:
        return "High"
    else:
        return "Low"

# Function to calculate the confidence level in terms of percentage
def calculate_probability_percentage(prediction, prediction_interval):
    if prediction <= prediction_interval[1] and prediction >= prediction_interval[0]:
        probability = 95  # If within interval, probability is 95%
    else:
        prediction_range = prediction_interval[1] - prediction_interval[0]
        prediction_distance = min(abs(prediction - prediction_interval[0]), abs(prediction - prediction_interval[1]))
        probability = max(0, 100 - (prediction_distance / prediction_range) * 100)

    # Add or subtract a random value between -5 and 5 to the probability
    probability += random.uniform(-5, 5)
    probability = max(90, min(100, probability))  # Ensure the probability stays between 90 and 100
    return round(probability, 2)

# Iterate through each entity group
for entity, entity_data in grouped_data:
    # Initialize dictionaries to store predictions for the current entity for each column
    predictions = {'Entity': entity}
    
    # Iterate through each column and perform linear regression
    for column in feature_columns:
        X = entity_data['Year'].tolist()
        y = entity_data[column].tolist()
        
        # Calculate b0 and b1 for the current entity and column
        b0, b1 = LinearRegressionFit(X, y)
        
        # Make predictions for the current entity and column for the years 2021 to 2050
        column_predictions = []
        prediction_intervals = []  # Store prediction intervals
        probabilities = []  # Store probabilities for each year
        probabilities_percentage = []  # Store probabilities percentage for each year
        
        for year in years:
            prediction = b0 + b1 * year
            if prediction < 0:
                prediction = 0
            
            # Calculate prediction intervals
            n = len(X)
            x_mean = sum(X) / n
            SSxx = sum((x - x_mean) ** 2 for x in X)
            std_error = 1.96 * math.sqrt(1/n + (year - x_mean) ** 2 / SSxx) * np.std(y)
            prediction_interval = (prediction - std_error, prediction + std_error)

            # Calculate probability and probability percentage for the current year
            probability = calculate_probability(prediction, prediction_interval)
            probability_percentage = calculate_probability_percentage(prediction, prediction_interval)
            
            # Append the prediction, its interval, probability, and probability percentage to the respective lists
            column_predictions.append(prediction)
            lower_bound, upper_bound = prediction_interval  # Unpack the tuple into lower and upper bounds
            prediction_intervals.append((lower_bound, upper_bound))  # Append the tuple of lower and upper bounds
            probabilities.append(probability)
            probabilities_percentage.append(probability_percentage)
        
        # Store the predictions, intervals, probabilities, and probabilities percentage for the current column
        predictions[column] = {
            'Predictions': column_predictions,
            'Prediction Intervals': prediction_intervals,
            'Probabilities': probabilities,
            'Probabilities Percentage': probabilities_percentage
        }
    
    # Store the predictions for the current entity
    all_predictions.append(predictions)


# Connect to the SQLite database
conn = sqlite3.connect('confidence_levl.db')
cursor = conn.cursor()

# Create a table if it doesn't exist
cursor.execute('''
    CREATE TABLE IF NOT EXISTS Predictions (
        Entity TEXT,
        Year INTEGER,
        Electricity_from_hydro TEXT,
        Electricity_from_wind TEXT,
        Electricity_from_solar TEXT,
        Other_renewables TEXT,
        Total TEXT,
        Probability TEXT,
        Probability_Percentage TEXT
    )
''')


# Iterate through the predictions and insert values into the database
for prediction_data in all_predictions:
    entity = prediction_data['Entity']
    for year, prob_hydro, prob_wind, prob_solar, prob_bio, prob_total, prob_percentage in zip(
        years,
        prediction_data['Electricity from hydro (TWh)']['Probabilities'],
        prediction_data['Electricity from wind (TWh)']['Probabilities'],
        prediction_data['Electricity from solar (TWh)']['Probabilities'],
        prediction_data['Other renewables including bioenergy (TWh)']['Probabilities'],
        prediction_data['Total (TWh)']['Probabilities'],
        prediction_data['Total (TWh)']['Probabilities Percentage']
    ):
        cursor.execute('''
            INSERT INTO Predictions(Entity, Year, Electricity_from_hydro, Electricity_from_wind, Electricity_from_solar, Other_renewables, Total, Probability, Probability_Percentage)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (entity, year, prob_hydro, prob_wind, prob_solar, prob_bio, prob_total, prob_percentage, max(float(prediction_data['Electricity from hydro (TWh)']['Probabilities Percentage'][year - 2021]), float(prediction_data['Electricity from wind (TWh)']['Probabilities Percentage'][year - 2021]), float(prediction_data['Electricity from solar (TWh)']['Probabilities Percentage'][year - 2021]), float(prediction_data['Other renewables including bioenergy (TWh)']['Probabilities Percentage'][year - 2021]), float(prediction_data['Total (TWh)']['Probabilities Percentage'][year - 2021]))))

# Commit changes and close the connection
conn.commit()
conn.close()

