# Preprocessing Function

In [2]:
# Step 1: Load the data
df = pd.read_csv('/content/population.csv')
df.head()

Unnamed: 0,City,Country,Continent,Population (2024),Population (2023),Growth Rate
0,Tokyo,Japan,Asia,37115035,37194105,-0.0021
1,Delhi,India,Asia,33807403,32941309,0.0263
2,Shanghai,China,Asia,29867918,29210808,0.0225
3,Dhaka,Bangladesh,Asia,23935652,23209616,0.0313
4,Sao Paulo,Brazil,South America,22806704,22619736,0.0083


In [17]:
X = df.drop('Growth Rate', axis=1)
y = df['Growth Rate']

In [18]:
import pandas as pd

def preprocess_data(df):
    """
    Clean and preprocess the dataset.
    - Handles missing values
    - Encodes categorical data
    - Scales numerical features
    """
    # Handle missing values
    df = df.fillna(df.mean())

    # Encode categorical columns
    df = pd.get_dummies(df, drop_first=True)

    # Scale numerical data
    for col in df.select_dtypes(include='number').columns:
        df[col] = (df[col] - df[col].mean()) / df[col].std()

    return df


# Model Training—Turning Data into Predictions

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

def train_model(X, y):
    """
    Train a machine learning model on the data.
    """
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a Random Forest Classifier
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    return model, X_test, y_test


# Model Evaluation—Grading the Model’s Performance

In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix

def evaluate_model(model, X_test, y_test):
    """
    Evaluate the trained model.
    """
    # Predict on test data
    predictions = model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)

    # Generate confusion matrix
    confusion = confusion_matrix(y_test, predictions)

    # Print evaluation metrics
    print(f"Accuracy: {accuracy}")
    print(f"Confusion Matrix:\n{confusion}")