In [None]:
# import necessary modules
import kagglehub # pip install kagglehub[pandas-datasets]
from math import sqrt
from kagglehub import KaggleDatasetAdapter
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, r2_score, mean_squared_error, f1_score

In [34]:
# load iris dataset from kaggle
iris_df = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "uciml/iris",
    "Iris.csv",
)

print(iris_df.head())

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


In [35]:
# load real_estate dataset from kaggle
real_estate_df = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "uviiiii/real-estate",
    "Real estate valuation data set.csv",
)

print(real_estate_df.head())

   No  X1 transaction date  X2 house age  \
0   1             2012.917          32.0   
1   2             2012.917          19.5   
2   3             2013.583          13.3   
3   4             2013.500          13.3   
4   5             2012.833           5.0   

   X3 distance to the nearest MRT station  X4 number of convenience stores  \
0                                84.87882                               10   
1                               306.59470                                9   
2                               561.98450                                5   
3                               561.98450                                5   
4                               390.56840                                5   

   X5 latitude  X6 longitude  Y house price of unit area  
0     24.98298     121.54024                        37.9  
1     24.98034     121.53951                        42.2  
2     24.98746     121.54391                        47.3  
3     24.98746     121.54391  

In [36]:
from sklearn.ensemble import RandomForestClassifier

X = iris_df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
y = iris_df['Species']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42
)

mlp = RandomForestClassifier(n_estimators=40, random_state=42)

mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

precision = recall_score(y_test, y_pred, average="macro")

print(f"Precision: {precision:.2f}")

Precision: 0.98


In [37]:
from sklearn.ensemble import RandomForestRegressor

print("Dataset loaded. Shape:", real_estate_df.shape)
print("Columns:", real_estate_df.columns.tolist())

target_col = 'Y house price of unit area'
X = real_estate_df.select_dtypes(include=['number']).drop(target_col, axis=1, errors='ignore')
y = real_estate_df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")

rf = RandomForestRegressor(n_estimators=400, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"{r2 = }")
r2_rounded = round(r2, 2)
print(f"R² Score: {r2_rounded}")

Dataset loaded. Shape: (414, 8)
Columns: ['No', 'X1 transaction date', 'X2 house age', 'X3 distance to the nearest MRT station', 'X4 number of convenience stores', 'X5 latitude', 'X6 longitude', 'Y house price of unit area']
Train size: 331, Test size: 83
r2 = 0.8093038124652978
R² Score: 0.81


In [38]:
X = iris_df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
y = iris_df['Species']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)

print(X_train.shape)

(120, 4)


In [39]:
from sklearn.neural_network import MLPRegressor

X = real_estate_df.select_dtypes(include=['number']).drop('Y house price of unit area', axis=1, errors='ignore')  # Numeric features
y = real_estate_df['Y house price of unit area']  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
print(f"Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")


mlp = MLPRegressor(hidden_layer_sizes=(10,), max_iter=10000, random_state=42)
mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

rmse = mean_squared_error(y_test, y_pred)
print(f"{rmse = }")
print(f"{sqrt(rmse) = }")
rmse_rounded = round(rmse, 2)
print(f"Root Mean Squared Error (RMSE): {rmse_rounded}")

Train size: 248, Test size: 166
rmse = 131.84633924226756
sqrt(rmse) = 11.48243611966849
Root Mean Squared Error (RMSE): 131.85


In [40]:
from sklearn.tree import DecisionTreeRegressor

target_col = 'Y house price of unit area'
X = real_estate_df.select_dtypes(include=['number']).drop([target_col, 'No'], axis=1, errors='ignore')
y = real_estate_df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)
r2 = r2_score(y_test, y_pred)
r2_rounded = round(r2, 2)
print(f"R² Score: {r2_rounded}")

R² Score: 0.6


In [41]:
from sklearn.neural_network import MLPClassifier

X = iris_df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
y = iris_df['Species']

# Division (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Apprentissage : {X_train.shape[0]}, Test : {X_test.shape[0]}")

# Apprentissage du réseau
mlp = MLPClassifier(hidden_layer_sizes=(5), max_iter=10000, random_state=42)
mlp.fit(X_train, y_train)

# Précision
y_pred = mlp.predict(X_test)
f1 = f1_score(y_test, y_pred, average="macro")
print(f"{f1 = }")
f1_rounded = round(f1, 2)
print(f"{f1_rounded = }")

Apprentissage : 120, Test : 30
f1 = 1.0
f1_rounded = 1.0
