In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Cutting across (general data manipulation, visualization, and utility)
from sklearn.model_selection import train_test_split, cross_val_score, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer


# Classification Model Libraries (common and versatile)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier


# Regression Model Libraries (common and versatile)
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Data loading
try:
    population_data_path = "data/ke_fp_population_data.csv"
    service_data_path = "data/ke_fp_service_data.csv"

    # Read with 'latin1' encoding
    df_population = pd.read_csv(population_data_path, encoding='latin1')
    df_service = pd.read_csv(service_data_path, encoding='latin1')

    # Print the shape of the datasets

    print("Datasets loaded successfully:")
    print(f"ke_fp_population_data.csv shape: {df_population.shape}")
    print(f"ke_fp_service_data.csv shape: {df_service.shape}")

except FileNotFoundError as e:
    print(f"Error: One or both of the CSV files were not found.")
    print(f"Please ensure 'ke_fp_population_data.csv' and 'ke_fp_service_data.csv' are in a folder named 'data' in the same directory as this script.")
    print(e)
except Exception as e:
    print(f"An unexpected error occurred while loading the datasets: {e}")





Datasets loaded successfully:
ke_fp_population_data.csv shape: (47, 38)
ke_fp_service_data.csv shape: (1128, 60)
