In [None]:
import pandas as pd

class DataProcessor:
    """
    A class to process and merge multiple datasets.

    Attributes:
        kaggle_path (str): Path to the Kaggle dataset file.
        data510_path (str): Path to the '510data' dataset file.
        culture_path (str): Path to the culture dataset file.
    """

    def __init__(self, kaggle_path, data510_path, culture_path):
        """
        Initializes DataProcessor with paths to the datasets.

        Args:
            kaggle_path (str): Path to the Kaggle dataset file.
            data510_path (str): Path to the '510data' dataset file.
            culture_path (str): Path to the culture dataset file.
        """
        self.kaggle_path = kaggle_path
        self.data510_path = data510_path
        self.culture_path = culture_path
    
        
    
    def load_and_clean_kaggle_data(self):
        
        """
        Loads the Kaggle dataset, cleans it by dropping NaNs in 'Life expectancy' 
        and filling NaNs in numeric columns with their mean values.

        Returns:
            DataFrame: Cleaned Kaggle dataset with average values grouped by country.
        """
        kaggle = pd.read_csv(self.kaggle_path)
        kaggle_dropped = kaggle.dropna(subset=['Life expectancy '])
        numeric_columns = kaggle_dropped.select_dtypes(include=['number']).columns

        for var in numeric_columns:
            kaggle_dropped[var].fillna(kaggle_dropped[var].mean(), inplace=True)

        kaggle_dropped_num = kaggle_dropped.drop(columns=['Year', 'Status'])
        kaggle_average = kaggle_dropped_num.groupby('Country').mean().reset_index()
        kaggle_average = kaggle_average.rename(columns={'Country': 'Country Name'})
        kaggle_average['Country Name'] = kaggle_average['Country Name'].str.upper()
        return kaggle_average

    def process_510_data(self):
        """
        Processes the '510data' dataset. This includes renaming columns, converting
        year columns to numeric, and calculating mean values for each country and series.

        Returns:
            DataFrame: Processed '510data' dataset, pivoted with countries and series names.
        """
        df = pd.read_csv(self.data510_path)
        df.columns = [col.split('[')[0].strip() for col in df.columns]
        df = df.drop(columns=['Series Code', 'Country Code'])

        years_columns = [str(year) for year in range(2000, 2016)]
        df[years_columns] = df[years_columns].apply(pd.to_numeric, errors='coerce')
        for col in years_columns:
            df[col] = df.apply(
                lambda row: row[col] if not pd.isna(row[col]) else (
                    row[years_columns].mean() if not row[years_columns].isna().all() else df[col].mean()),
                axis=1
            )

        df['Mean'] = df[years_columns].apply(lambda row: row.mean() if not row.isna().all() else pd.NA, axis=1)
        new_df = df[['Series Name', 'Country Name', 'Mean']]
        agg_df = new_df.groupby(['Country Name', 'Series Name']).mean().reset_index()
        pivoted_df = agg_df.pivot(index='Country Name', columns='Series Name', values='Mean')
        pivoted_df.columns.name = None
        return pivoted_df.reset_index()

    def merge_data(self, kaggle_average, data510_processed):
        """
        Merges the processed Kaggle dataset, '510data' dataset, and culture dataset 
        into a single DataFrame. NaN values in numerical columns are filled with mean values.

        Args:
            kaggle_average (DataFrame): Processed Kaggle dataset with average values.
            data510_processed (DataFrame): Processed '510data' dataset.

        Returns:
            DataFrame: Final merged DataFrame with NaN values filled.
        """
        culture = pd.read_csv(self.culture_path)
        culture.columns.values[0] = 'Country Name'
        culture['Country Name'] = culture['Country Name'].str.upper()

        data510_processed['Country Name'] = data510_processed['Country Name'].str.upper()
        result = data510_processed.merge(culture, how='left', on='Country Name')
        final = result.merge(kaggle_average, how='left', on='Country Name')

        numerical_final = final.select_dtypes(include=['number'])
        mean_values = numerical_final.mean()
        final_clean = final.fillna(mean_values)
        return final_clean

# Usage example
processor = DataProcessor('/Users/shuai/Desktop/Life Expectancy Data.csv', 
                          '/Users/shuai/Desktop/510data.csv', 
                          '/Users/shuai/Desktop/culture_df.csv')

kaggle_average = processor.load_and_clean_kaggle_data()
data510_processed = processor.process_510_data()
final_clean = processor.merge_data(kaggle_average, data510_processed)
final_clean.to_csv('510df_final.csv', index=False)

