In [10]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

# Load dataset (Concrete Compressive Strength)
concrete = fetch_ucirepo(id=165)

# Extract features and target from the fetched dataset
X = concrete.data.features
y = concrete.data.targets

# Perform EDA
print("First five records:")
print(pd.concat([X, y], axis=1).head())

print("\nDataset Info:", X.info())
print("\nTarget Info:", y.describe())
print("\nFeature names:", X.columns.tolist())
print("Features missing values:"), X.isnull().sum()
print("\nTargets missing values:", y.isnull().sum())

print("\n=== STATISTICAL SUMMARY - FEATURES ===")
print(X.describe())

print("\n=== STATISTICAL SUMMARY - TARGET ===")
print(y.describe())

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train models
knn_regressor = KNeighborsRegressor(n_neighbors=5).fit(X_train, y_train)

# Evaluate models
print("\nRegressor training R² score:", knn_regressor.score(X_train, y_train))
print("Regressor test R² score:", knn_regressor.score(X_test, y_test))

# Make predictions
y_pred = knn_regressor.predict(X_test)
print("\nPredictions (Regressor):", y_pred[:10])


First five records:
   Cement  Blast Furnace Slag  Fly Ash  Water  Superplasticizer  \
0   540.0                 0.0      0.0  162.0               2.5   
1   540.0                 0.0      0.0  162.0               2.5   
2   332.5               142.5      0.0  228.0               0.0   
3   332.5               142.5      0.0  228.0               0.0   
4   198.6               132.4      0.0  192.0               0.0   

   Coarse Aggregate  Fine Aggregate  Age  Concrete compressive strength  
0            1040.0           676.0   28                          79.99  
1            1055.0           676.0   28                          61.89  
2             932.0           594.0  270                          40.27  
3             932.0           594.0  365                          41.05  
4             978.4           825.5  360                          44.30  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 8 columns):
 #   Column              No