<a href="https://colab.research.google.com/github/HaojingGao/APEC-portfolio/blob/main/Entropy_Method.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Ingest

In [2]:
import numpy as np
import pandas as pd


#Entropy-Method

In [19]:
# Load the local file
file_path = '/APECData_normalized.xlsx'

# Read the data in Excel into the data frame
data_frame = pd.read_excel(file_path)



# Rename the columns
data_frame.columns = [
    'Country', 'Traffic_Risk', 'Personal_Risk', 'Change_Trend',
    'Alcohol_Deaths', 'Seatbelt_Use', 'Helmet_Use', 'Non_Motorcycle_Vehicles',
    'Vehicle_Standards', 'Road_Safety_Audits', 'Urban_Population',
    'GDP_per_Capita', 'Life_Expectancy', 'Adult_Literacy', 'Speed_Limit_Enforcement',
    'Drink_Driving_Enforcement', 'Seatbelt_Enforcement', 'Helmet_Use_Enforcement', 'HDI'
]

data_frame_clean = data_frame.dropna(subset=['Country'])

# Find the numerical columns that need to normalize
numerical_columns = [
    'Traffic_Risk', 'Personal_Risk', 'Change_Trend', 'Alcohol_Deaths', 'Seatbelt_Use',
    'Helmet_Use', 'Non_Motorcycle_Vehicles', 'Vehicle_Standards', 'Road_Safety_Audits',
    'Urban_Population', 'GDP_per_Capita', 'Life_Expectancy', 'Adult_Literacy',
    'Speed_Limit_Enforcement', 'Drink_Driving_Enforcement', 'Seatbelt_Enforcement',
    'Helmet_Use_Enforcement', 'HDI'
]

# Convert numerical columns into numpy arrays
Z = data_frame_clean[numerical_columns].astype(float).to_numpy()


# Define mylog function to handle log(0) error
def mylog(p):
    return np.log(np.where(p == 0, 1, p))

m, n = Z.shape    # Obtain numbers of rows and columns for matrix X
D = np.zeros(n)   # Initialize an empty array to save the indicator weights

for i in range(n):
  x = Z[:, i]   #Initialize the data in ith indicator
  probability = x / np.sum(x)
  # print(probability)
  entropy = -np.sum(probability * mylog(probability)) / np.log(m)
  # print(entropy)
  D[i] = 1 - entropy

weights = D / np.sum(D)

# Convert weights to DataFrame
weights_df = pd.DataFrame(weights, index=numerical_columns, columns=['Weight'])

# Insert headers for weights
weights_df.insert(0, 'Indicators', numerical_columns)

# Format numerical columns to three decimal places
weights_df['Weight'] = weights_df['Weight'].apply(lambda x: f"{x:.3f}")

# Save normalized data into new local file
output_file_path = '/APECData_weights.xlsx'
weights_df.to_excel(output_file_path, index=False)




