#**Standard initial check on one .CSV file**

In [2]:
import pandas as pd
df = pd.read_csv('/content/World_Happiness_Project/raw_data/world_happiness_2015.csv', sep=';', encoding='utf-8', engine='python')
df.head()


Unnamed: 0,Ranking,Country,Regional indicator,Happiness score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Switzerland,Western Europe,759,826,96,73,99,37,24
1,2,Iceland,Western Europe,756,77,1,73,94,55,74
2,3,Denmark,Western Europe,753,784,97,70,97,43,12
3,4,Norway,Western Europe,752,863,95,71,1,44,34
4,5,Canada,North America and ANZ,743,785,94,71,95,58,4


#**Data pre-processing, Data cleaning and specifying the maste .CSV file structure**


In [4]:
import pandas as pd
import os

folder_path = '/content/World_Happiness_Project/raw_data'
files = sorted([f for f in os.listdir(folder_path) if f.endswith('.csv')])

column_map = {
    'Country': 'Country',
    'Country name': 'Country',
    'Regional indicator': 'Region',
    'Happiness score': 'Happiness_Score',
    'Ladder score': 'Happiness_Score',
    'Life Ladder': 'Happiness_Score',
    'GDP per capita': 'GDP_per_Capita',
    'Logged GDP per capita': 'GDP_per_Capita',
    'Social support': 'Social_Support',
    'Healthy life expectancy': 'Healthy_Life_Expectancy',
    'Freedom to make life choices': 'Freedom',
    'Generosity': 'Generosity',
    'Perceptions of corruption': 'Corruption_Perception'
}

dataframes = []

for file in files:
    try:
        year = int(file[-8:-4])
        if year == 2024:
          print("🔍 Columns in 2024 file:", df.columns.tolist())

        path = os.path.join(folder_path, file)
        print(f"✅ Reading: {file} (Year: {year})")

        # Read with ; separator and , as decimal
        df = pd.read_csv(path, sep=';', engine='python', encoding='utf-8', decimal=',')

        df.columns = [col.strip() for col in df.columns]
        df = df.rename(columns=column_map)
        df['Year'] = year

        required_cols = ['Country', 'Region', 'Happiness_Score', 'GDP_per_Capita',
                         'Social_Support', 'Healthy_Life_Expectancy', 'Freedom',
                         'Generosity', 'Corruption_Perception', 'Year']

        df = df[[col for col in required_cols if col in df.columns]]
        df = df[df['Country'].notna() & df['Happiness_Score'].notna()]
        dataframes.append(df)

    except Exception as e:
        print(f"❌ Error reading {file}: {e}")

# Combine all years
final_df = pd.concat(dataframes, ignore_index=True)
final_df.to_csv('/content/World_Happiness_Project/output/happiness_master.csv', index=False)
print("✅ All files processed and saved to 'happiness_master.csv'")


✅ Reading: world_happiness_2015.csv (Year: 2015)
✅ Reading: world_happiness_2016.csv (Year: 2016)
✅ Reading: world_happiness_2017.csv (Year: 2017)
✅ Reading: world_happiness_2018.csv (Year: 2018)
✅ Reading: world_happiness_2019.csv (Year: 2019)
✅ Reading: world_happiness_2020.csv (Year: 2020)
✅ Reading: world_happiness_2021.csv (Year: 2021)
✅ Reading: world_happiness_2022.csv (Year: 2022)
✅ Reading: world_happiness_2023.csv (Year: 2023)
🔍 Columns in 2024 file: ['Country', 'Region', 'Happiness_Score', 'GDP_per_Capita', 'Social_Support', 'Healthy_Life_Expectancy', 'Freedom', 'Generosity', 'Corruption_Perception', 'Year']
✅ Reading: world_happiness_2024.csv (Year: 2024)
✅ All files processed and saved to 'happiness_master.csv'


#**Checking whether the master .CSV file in the output folder has been processed correctly or not**

In [5]:
df = pd.read_csv('/content/World_Happiness_Project/output/happiness_master.csv')

# Check structure
print(df.columns)
print(df.dtypes)

# Preview
print(df.head())
print(df.tail())

# Check nulls
print(df.isnull().sum())


Index(['Country', 'Region', 'Happiness_Score', 'GDP_per_Capita',
       'Social_Support', 'Healthy_Life_Expectancy', 'Freedom', 'Generosity',
       'Corruption_Perception', 'Year'],
      dtype='object')
Country                     object
Region                      object
Happiness_Score            float64
GDP_per_Capita             float64
Social_Support             float64
Healthy_Life_Expectancy    float64
Freedom                    float64
Generosity                 float64
Corruption_Perception      float64
Year                         int64
dtype: object
       Country                 Region  Happiness_Score  GDP_per_Capita  \
0  Switzerland         Western Europe             7.59            8.26   
1      Iceland         Western Europe             7.56            7.70   
2      Denmark         Western Europe             7.53            7.84   
3       Norway         Western Europe             7.52            8.63   
4       Canada  North America and ANZ             7.43       