In [33]:
import pandas as pd

# Read the CSV file
file_path = '../preped.csv'
df = pd.read_csv(file_path)

In [None]:
print(df.columns)


In [None]:
# Define age groups
def age_group(age):
    if age == 0:
        return 'All'
    elif 1 <= age <= 16:
        return 'Teen'
    else:
        return 'Adult'
df['Age Group'] = df['Minimum Age'].apply(age_group)

df['Age Group']

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score


features = df.drop(columns=['Minimum Age', 'Age Group']).select_dtypes(include=[int, float])

X = features
y = df['Age Group']

# 3. Feature Scaling (Essential for KNN):
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# Split data into training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Adjust test size as needed

# Train the KNN model:
knn = KNeighborsClassifier(n_neighbors=5) # Choose an appropriate value for k (n_neighbors)
knn.fit(X_train, y_train)

# Make predictions on the test set:
y_pred = knn.predict(X_test)

# Evaluate the model:
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
import pandas as pd
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Assuming 'df' is your DataFrame
features = df.drop(columns=['Minimum Age', 'Age Group']).select_dtypes(include=[int, float])

X = features
y = df['Age Group']

# 3. Feature Scaling (Essential for KNN):
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Adjust test size as needed

# Define the parameter grid for HalvingGridSearchCV
param_grid = {
    'n_neighbors': range(1, 30),  # Explore a range of n_neighbors values
    'weights': ['uniform', 'distance'], # Explore different weighting strategies
    'metric': ['euclidean', 'manhattan', 'minkowski'] # Explore different distance metrics
}

# Initialize HalvingGridSearchCV
halving_cv = HalvingGridSearchCV(
    KNeighborsClassifier(), 
    param_grid, 
    cv=5,  # Number of cross-validation folds
    scoring='accuracy',  # Scoring metric
    n_jobs=-1, # Use all available cores for parallel processing
    verbose=1, #  Increase verbosity for more detailed output
    factor=2, # Reduction factor for resources (e.g., half the candidates in each iteration)
    min_resources=10 # Minimum resources to start with
)

# Train the model using HalvingGridSearchCV
halving_cv.fit(X_train, y_train)

# Get the best estimator from HalvingGridSearchCV
best_knn = halving_cv.best_estimator_

# Make predictions on the test set using the best model
y_pred = best_knn.predict(X_test)


In [None]:

# Evaluate the model:
print("Best Hyperparameters:", halving_cv.best_params_)
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))