In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Load settings from config.yaml
with open("config.yaml", 'r') as file:
    config = yaml.safe_load(file)

In [None]:
merged_historical_data_path = config['prepare_historical_data']['data']['merged_historical_data_path']
merged_future_data_path = config['prepare_future_data']['data']['merged_future_data_path']

In [None]:
hist_data = pd.read_csv(merged_historical_data_path, parse_dates=['Date'])
future_data = pd.read_csv(merged_future_data_path, parse_dates=['Date'])

In [None]:
hist_data.info()

In [None]:
future_data.info()

In [None]:
data = pd.concat([hist_data, future_data])

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.info()

### Scale Parameters Using MinMaxScaler

#### OP_Price

In [None]:
print('Min OP Price -', round(data['OP_Price'].min(), 2))
print('Max OP Price -', round(data['OP_Price'].max(), 2))

In [None]:
# MinMax scaling of OP_Price
scaler = MinMaxScaler()
data['OP_Price_Scaled'] = scaler.fit_transform(data[['OP_Price']])

In [None]:
print('Min Scaled OP Price -', round(data['OP_Price_Scaled'].min(), 2))
print('Max Scaled OP Price -',round(data['OP_Price_Scaled'].max(), 2))

#### AVPI

In [None]:
print('Min AVPI -', round(data['AVPI'].min(), 2))
print('Max AVPI -', round(data['AVPI'].max(), 2))

In [None]:
# MinMax scaling of AVPI
scaler = MinMaxScaler()
data['AVPI_Scaled'] = scaler.fit_transform(data[['AVPI']])

In [None]:
print('Min Scaled AVPI -', round(data['AVPI_Scaled'].min(), 2))
print('Max Scaled AVPI -', round(data['AVPI_Scaled'].max(), 2))

#### PR

In [None]:
print('Min PR -', round(data['PR'].min(), 2))
print('Max PR -', round(data['PR'].max(), 2))

In [None]:
# MinMax scaling of PR
scaler = MinMaxScaler()
data['PR_Scaled'] = scaler.fit_transform(data[['PR']])

In [None]:
print('Min Scaled PR -', round(data['PR_Scaled'].min(), 2))
print('Max Scaled PR -', round(data['PR_Scaled'].max(), 2))

#### LAR

In [None]:
print('Min LAR -', round(data['LAR'].min(), 2))
print('Max LAR -', round(data['LAR'].max(), 2))

In [None]:
# MinMax scaling of LAR
scaler = MinMaxScaler()
data['LAR_Scaled'] = scaler.fit_transform(data[['LAR']])

In [None]:
print('Min Scaled LAR -', round(data['LAR_Scaled'].min(), 2))
print('Max Scaled LAR -', round(data['LAR_Scaled'].max(), 2))

In [None]:
data.columns

In [None]:
all_parameters_merged_data_path = config['merge_and_scale_data']['data']['all_parameters_merged_data_path']

In [None]:
data.to_csv(all_parameters_merged_data_path, index=False)

### Find Correlation

In [None]:
# Function to calculate correlation and normalize it
def calculate_normalized_correlations(data, target_column):
    """
    Calculate normalized absolute correlations of all columns with the target column.
    """
    correlation_matrix = data.corr()
    absolute_correlations = correlation_matrix[target_column].abs().drop(target_column)
    normalized_correlations = absolute_correlations / absolute_correlations.sum()
    normalized_correlations = normalized_correlations.round(2).sort_values(ascending=False)
    print("Normalized Correlations:")
    print(normalized_correlations)
    return normalized_correlations


In [None]:
# Function to calculate weightage ranges based on normalized weights
def calculate_weightage_ranges(weights, k=1):
    """
    Calculate weightage ranges for each parameter based on its weight.
    """
    weight_values = np.array(list(weights.values()))
    std_dev_weight = np.std(weight_values)

    ranges = {
        param: (max(weight - k * std_dev_weight, 0), weight + k * std_dev_weight) 
        for param, weight in weights.items()
    }

    print("\nWeightage Ranges for Each Parameter:")
    for param, (low, high) in ranges.items():
        print(f"{param}: [{low:.2f}, {high:.2f}]")

    return ranges


In [None]:
if __name__ == "__main__":

    data = pd.read_csv(all_parameters_merged_data_path)
    data = data[['OP_Price_Scaled', 'AVPI_Scaled', 'PR_Scaled', 'LAR_Scaled', 'Votable Supply']]

    # Step 1: Calculate and display the correlation matrix
    correlation_matrix = data.corr()
    print("\nCorrelation Matrix:")
    print(correlation_matrix)

    # Step 2: Plot the heatmap of the correlation matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
    plt.title('Correlation Heatmap')
    plt.show()

    # Step 3: Calculate normalized correlations
    target_column = 'Votable Supply'
    normalized_correlations = calculate_normalized_correlations(data, target_column)

    # Step 4: Use normalized correlations as weights to calculate ranges
    weights = normalized_correlations.to_dict()
    weightage_ranges = calculate_weightage_ranges(weights)

    # Step 5: Plot Normalized Correlations
    plt.figure(figsize=(10, 6))
    bars = plt.bar(normalized_correlations.index, normalized_correlations.values, color='skyblue')

    for bar in bars:
        plt.text(
            bar.get_x() + bar.get_width() / 2,  
            bar.get_height() + 0.01,           
            f'{bar.get_height():.2f}',        
            ha='center', va='bottom', fontsize=10, color='black' 
        )

    plt.title('Normalized Correlations with Votable Supply')
    plt.xlabel('Feature')
    plt.ylabel('Normalized Correlation')
    plt.xticks(rotation=45)
    plt.show()

    # Step 6: Plot Weightage Ranges
    weightage_low = [low for low, _ in weightage_ranges.values()]
    weightage_high = [high for _, high in weightage_ranges.values()]
    params = list(weightage_ranges.keys())

    plt.figure(figsize=(10, 6))
    plt.barh(params, weightage_high, color='lightcoral', label='Range')
    plt.barh(params, weightage_low, color='white')
    plt.title('Weightage Ranges for Each Parameter')
    plt.xlabel('Weight Range')
    plt.ylabel('Parameter')
    plt.legend()
    plt.show()