In [7]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler

# Load the data
file_path = 'unclean-data.csv'
data = pd.read_csv(file_path)

# One-Hot Encoding for categorical columns
categorical_columns = ['Country Name', 'Country Code_x']
encoder = OneHotEncoder(sparse=False)
encoded_categories = encoder.fit_transform(data[categorical_columns])

# Create a DataFrame with the encoded categorical columns
encoded_df = pd.DataFrame(encoded_categories, columns=encoder.get_feature_names_out(categorical_columns))

# Drop the original categorical columns and concatenate the encoded columns
data = data.drop(categorical_columns, axis=1)
data = pd.concat([data, encoded_df], axis=1)

# Normalization
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

# Standardization
standardizer = StandardScaler()
standardized_data = standardizer.fit_transform(data)

# Convert the numpy arrays back to DataFrames
normalized_df = pd.DataFrame(normalized_data, columns=data.columns)
standardized_df = pd.DataFrame(standardized_data, columns=data.columns)

# Save the transformed data to new CSV files
normalized_df.to_csv('input-nrm.csv', index=False)
standardized_df.to_csv('input-std.csv', index=False)

# Display the first few rows of each DataFrame
print("Encoded and Normalized Data:")
print(normalized_df.head())

print("\nEncoded and Standardized Data:")
print(standardized_df.head())


Encoded and Normalized Data:
       MPA  Share of global plastics emitted to ocean  FisheryConsumption  \
0  0.00005                                   0.002413            0.004853   
1  0.02839                                   0.004391            0.000171   
2  0.11270                                   0.000039            0.000902   
3  0.11768                                   0.011609            0.010023   
4  0.00182                                   0.000006            0.000028   

   Country Name_Albania  Country Name_Algeria  Country Name_Angola  \
0                   0.0                   0.0                  1.0   
1                   1.0                   0.0                  0.0   
2                   0.0                   0.0                  0.0   
3                   0.0                   0.0                  0.0   
4                   0.0                   0.0                  0.0   

   Country Name_Antigua and Barbuda  Country Name_Argentina  \
0                       

