In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("./future_predictions.csv")
df

Unnamed: 0,Country,Year,Gender_Female,Gender_Male,Gender_Total,Age_Under18,Age_Over18,Age_Total,Predicted_VALUE
0,Ecuador,2023,0.360768,0.279835,0.359396,0.157750,0.248285,0.593964,82.200190
1,Ecuador,2024,0.367984,0.285432,0.366584,0.160905,0.253251,0.605844,81.982330
2,Ecuador,2025,0.375199,0.291029,0.373772,0.164060,0.258217,0.617723,81.017390
3,Ecuador,2026,0.382414,0.296626,0.380960,0.167215,0.263182,0.629602,79.830390
4,Ecuador,2027,0.389630,0.302222,0.388148,0.170370,0.268148,0.641481,78.538180
...,...,...,...,...,...,...,...,...,...
730,Malaysia,2025,0.206722,0.202324,0.650954,0.167137,0.224315,0.668548,47.223114
731,Malaysia,2026,0.210622,0.206141,0.663237,0.170290,0.228548,0.681162,46.419624
732,Malaysia,2027,0.214523,0.209959,0.675519,0.173444,0.232780,0.693776,45.642390
733,Malaysia,2028,0.218423,0.213776,0.687801,0.176598,0.237012,0.706390,44.645996


* slope analysis

In [2]:
combinations = [
    # Gender-specific age groups
    ('Female', 'Under18', 'Female_Under18'),
    ('Female', 'Over18', 'Female_Over18'),
    ('Female', 'Total', 'Female_AllAges'),
    ('Male', 'Under18', 'Male_Under18'),
    ('Male', 'Over18', 'Male_Over18'), 
    ('Male', 'Total', 'Male_AllAges'),
    ('Total', 'Under18', 'AllGenders_Under18'),
    ('Total', 'Over18', 'AllGenders_Over18'),
    ('Total', 'Total', 'AllGenders_AllAges')
]

new_rows = []
for _, row in df.iterrows():
    for gender, age, demo_name in combinations:
        gender_col = 'Gender_Total' if gender == 'Total' else f'Gender_{gender}'
        age_col = 'Age_Total' if age == 'Total' else f'Age_{age}'
        
        prob = row[gender_col] * row[age_col] * 100
        
        if prob > 0.01:  
            new_rows.append({
                'Country': row['Country'],
                'Year': row['Year'],
                'Gender': 'AllGenders' if gender == 'Total' else gender,
                'Age': 'AllAges' if age == 'Total' else age,
                'Demographic': demo_name,
                'Probability': prob,
                'VALUE': row['Predicted_VALUE']
            })

new_df = pd.DataFrame(new_rows).sort_values(['Country', 'Year', 'Demographic'])
new_df.insert(0, 'Id', range(1, len(new_df)+1))

# Column order
final_columns = [
    'Id', 'Country', 'Year', 'Demographic', 
    'Gender', 'Age', 'Probability', 'VALUE'
]
new_df = new_df[final_columns]

In [3]:
new_df

Unnamed: 0,Id,Country,Year,Demographic,Gender,Age,Probability,VALUE
3860,1,Albania,2023,AllGenders_AllAges,AllGenders,AllAges,38.174139,62.179344
3859,2,Albania,2023,AllGenders_Over18,AllGenders,Over18,9.724360,62.179344
3858,3,Albania,2023,AllGenders_Under18,AllGenders,Under18,7.072262,62.179344
3854,4,Albania,2023,Female_AllAges,Female,AllAges,15.025991,62.179344
3853,5,Albania,2023,Female_Over18,Female,Over18,3.827673,62.179344
...,...,...,...,...,...,...,...,...
1432,6611,Uzbekistan,2029,Female_Over18,Female,Over18,13.639407,91.643660
1431,6612,Uzbekistan,2029,Female_Under18,Female,Under18,9.547585,91.643660
1436,6613,Uzbekistan,2029,Male_AllAges,Male,AllAges,14.772422,91.643660
1435,6614,Uzbekistan,2029,Male_Over18,Male,Over18,10.761367,91.643660


In [4]:
#new_df.to_csv('Demographics.csv', index=False)

In [5]:
new_df = new_df.sort_values(['Country', 'Demographic', 'Year'])

new_df['YoY_Change'] = new_df.groupby(['Country', 'Demographic'])['VALUE'].diff()
new_df['YoY_Pct_Change'] = new_df.groupby(['Country', 'Demographic'])['VALUE'].pct_change() * 100

def yearly_interpretation(row):
    if pd.isna(row['YoY_Pct_Change']):
        return "First Year"
    elif row['YoY_Pct_Change'] > 5:
        return "Strong increase"
    elif row['YoY_Pct_Change'] > 1:
        return "Increase"
    elif abs(row['YoY_Pct_Change']) <= 1:
        return "Stable"
    elif row['YoY_Pct_Change'] < -1:
        return "Declining"
    else:
        return "Strong decline"

new_df['Yearly_Trend'] = new_df.apply(yearly_interpretation, axis=1)

In [6]:
from scipy.stats import linregress

def rolling_slope(group):
    group = group.sort_values('Year')
    if len(group) >= 3:
        slopes = []
        for i in range(len(group)-2):
            window = group.iloc[i:i+3]
            slope = linregress(window['Year'], window['VALUE'])[0]
            slopes.append(slope)
        # Align slopes with the middle year of each window
        group['Rolling_Slope'] = [np.nan] + slopes + [np.nan]
    return group

new_df = new_df.groupby(['Country', 'Demographic']).apply(rolling_slope)

# Interpret rolling slopes
new_df['Rolling_Trend'] = pd.cut(new_df['Rolling_Slope'],
                                bins=[-np.inf, -0.5, -0.1, 0.1, 0.5, np.inf],
                                labels=['Sharp decline', 'Mild decline', 'Stable', 
                                        'Mild growth', 'Strong growth'])

  new_df = new_df.groupby(['Country', 'Demographic']).apply(rolling_slope)


In [7]:
new_df = new_df.reset_index(drop=True)

new_df['Baseline'] = new_df.groupby(['Country', 'Demographic'])['VALUE'].transform('first')

new_df['Cumulative_Growth'] = ((new_df['VALUE'] - new_df['Baseline']) / new_df['Baseline']) * 100

# Add growth interpretation
new_df['Growth_Stage'] = np.where(
    new_df['Cumulative_Growth'] > 0,
    "Above baseline", 
    "Below baseline"
)

# Verify results
print(new_df[['Country', 'Year', 'Demographic', 'VALUE', 'Baseline', 'Cumulative_Growth']].head())

   Country  Year         Demographic      VALUE   Baseline  Cumulative_Growth
0  Albania  2023  AllGenders_AllAges  62.179344  62.179344           0.000000
1  Albania  2024  AllGenders_AllAges  62.335835  62.179344           0.251677
2  Albania  2025  AllGenders_AllAges  62.094128  62.179344          -0.137049
3  Albania  2026  AllGenders_AllAges  60.939846  62.179344          -1.993424
4  Albania  2027  AllGenders_AllAges  58.816320  62.179344          -5.408587


In [8]:
print(f"Final row count: {len(new_df)}")  # Should still be 6,578
print(new_df[['Country', 'Year', 'Demographic', 'YoY_Change', 'Yearly_Trend']].head())

Final row count: 6615
   Country  Year         Demographic  YoY_Change Yearly_Trend
0  Albania  2023  AllGenders_AllAges         NaN   First Year
1  Albania  2024  AllGenders_AllAges    0.156491       Stable
2  Albania  2025  AllGenders_AllAges   -0.241707       Stable
3  Albania  2026  AllGenders_AllAges   -1.154282    Declining
4  Albania  2027  AllGenders_AllAges   -2.123526    Declining


In [9]:
new_df

Unnamed: 0,Id,Country,Year,Demographic,Gender,Age,Probability,VALUE,YoY_Change,YoY_Pct_Change,Yearly_Trend,Rolling_Slope,Rolling_Trend,Baseline,Cumulative_Growth,Growth_Stage
0,1,Albania,2023,AllGenders_AllAges,AllGenders,AllAges,38.174139,62.179344,,,First Year,,,62.179344,0.000000,Below baseline
1,10,Albania,2024,AllGenders_AllAges,AllGenders,AllAges,39.716374,62.335835,0.156491,0.251677,Stable,-0.042608,Stable,62.179344,0.251677,Above baseline
2,19,Albania,2025,AllGenders_AllAges,AllGenders,AllAges,41.289149,62.094128,-0.241707,-0.387750,Stable,-0.697995,Sharp decline,62.179344,-0.137049,Below baseline
3,28,Albania,2026,AllGenders_AllAges,AllGenders,AllAges,42.892463,60.939846,-1.154282,-1.858923,Declining,-1.638904,Sharp decline,62.179344,-1.993424,Below baseline
4,37,Albania,2027,AllGenders_AllAges,AllGenders,AllAges,44.526316,58.816320,-2.123526,-3.484626,Declining,-1.925001,Sharp decline,62.179344,-5.408587,Below baseline
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6610,6579,Uzbekistan,2025,Male_Under18,Male,Under18,6.512796,101.490130,-3.091170,-2.955758,Declining,-2.957495,Sharp decline,108.937970,-6.836771,Below baseline
6611,6588,Uzbekistan,2026,Male_Under18,Male,Under18,6.760881,98.666310,-2.823820,-2.782359,Declining,-2.711495,Sharp decline,108.937970,-9.428907,Below baseline
6612,6597,Uzbekistan,2027,Male_Under18,Male,Under18,7.013602,96.067140,-2.599170,-2.634303,Declining,-2.477938,Sharp decline,108.937970,-11.814825,Below baseline
6613,6606,Uzbekistan,2028,Male_Under18,Male,Under18,7.270961,93.710434,-2.356706,-2.453186,Declining,-2.211740,Sharp decline,108.937970,-13.978171,Below baseline


In [10]:
new_df.to_csv('Safety_Trends.csv', index=False)

In [11]:
columns_to_drop = ["Gender", "Age", "YoY_Pct_Change", "Growth_Stage"]
db_df = new_df.drop(columns=columns_to_drop)
db_df = [
    "Id", "Country", "Year", "VALUE", "Demographic", "Probability",
    "Baseline", "YoY_Pct_Change", "Yearly_Trend",
    "Rolling_Slope", "Rolling_Trend", "Cumulative_Growth"
]

db_df = new_df[db_df]

db_df.to_csv("SAFETY_FOR_IMPORT.csv", index=False)