In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("pitfall_prediction_dataset.csv")
df.head()

Unnamed: 0,TerrainType,Weather,RockDensity,SurfaceRoughness,MoisturePct,SlopeDeg,TrafficLoad,DrainageQuality,DepthCm,SoilHardness,VibrationLevel,RiskScore,RiskLevel,PitfallPresent
0,Rocky,Rainy,Medium,0.17,86.92,10.94,High,Moderate,192.8,1.41,2.59,5.88,Medium,0
1,Asphalt,Foggy,Low,6.38,68.44,10.02,High,Good,9.5,8.7,2.35,2.2,Low,1
2,Clay,Snowy,Low,7.42,20.18,36.27,Low,Poor,161.8,1.69,4.43,7.67,Low,1
3,Asphalt,Foggy,Low,9.19,60.96,3.11,Medium,Poor,143.8,1.25,2.47,0.44,Medium,0
4,Asphalt,Foggy,High,8.85,9.09,28.71,Low,Good,79.5,1.04,0.89,3.44,Low,1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   TerrainType       10000 non-null  object 
 1   Weather           10000 non-null  object 
 2   RockDensity       10000 non-null  object 
 3   SurfaceRoughness  10000 non-null  float64
 4   MoisturePct       10000 non-null  float64
 5   SlopeDeg          10000 non-null  float64
 6   TrafficLoad       10000 non-null  object 
 7   DrainageQuality   10000 non-null  object 
 8   DepthCm           10000 non-null  float64
 9   SoilHardness      10000 non-null  float64
 10  VibrationLevel    10000 non-null  float64
 11  RiskScore         10000 non-null  float64
 12  RiskLevel         10000 non-null  object 
 13  PitfallPresent    10000 non-null  int64  
dtypes: float64(7), int64(1), object(6)
memory usage: 1.1+ MB


In [5]:
df.describe()

Unnamed: 0,SurfaceRoughness,MoisturePct,SlopeDeg,DepthCm,SoilHardness,VibrationLevel,RiskScore,PitfallPresent
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5.015884,49.925312,22.430923,99.55084,4.998921,2.497781,5.022395,0.4984
std,2.839075,28.892846,12.98696,58.029445,2.589613,1.436276,2.881183,0.500022
min,0.1,0.0,0.0,0.0,0.5,0.0,0.0,0.0
25%,2.55,25.0175,11.08,49.3,2.78,1.25,2.5,0.0
50%,5.02,50.025,22.455,99.0,4.975,2.51,5.065,0.0
75%,7.44,74.83,33.7125,150.4,7.22,3.73,7.48,1.0
max,10.0,99.99,45.0,200.0,9.5,5.0,10.0,1.0


In [6]:
df.isnull().sum()

TerrainType         0
Weather             0
RockDensity         0
SurfaceRoughness    0
MoisturePct         0
SlopeDeg            0
TrafficLoad         0
DrainageQuality     0
DepthCm             0
SoilHardness        0
VibrationLevel      0
RiskScore           0
RiskLevel           0
PitfallPresent      0
dtype: int64

In [7]:
df.drop(columns=["RiskScore"],inplace=True)

In [8]:
df.drop(columns=["RiskLevel"],inplace=True)

In [9]:
from sklearn.preprocessing import LabelEncoder

categorical_cols = ['TerrainType','Weather','RockDensity','TrafficLoad','DrainageQuality']
label_encoder = LabelEncoder()

for col in categorical_cols:
    df[col] = label_encoder.fit_transform(df[col])


In [10]:
df.head()

Unnamed: 0,TerrainType,Weather,RockDensity,SurfaceRoughness,MoisturePct,SlopeDeg,TrafficLoad,DrainageQuality,DepthCm,SoilHardness,VibrationLevel,PitfallPresent
0,3,2,2,0.17,86.92,10.94,0,1,192.8,1.41,2.59,0
1,0,1,1,6.38,68.44,10.02,0,0,9.5,8.7,2.35,1
2,1,3,1,7.42,20.18,36.27,1,2,161.8,1.69,4.43,1
3,0,1,1,9.19,60.96,3.11,2,2,143.8,1.25,2.47,0
4,0,1,0,8.85,9.09,28.71,1,0,79.5,1.04,0.89,1


In [11]:
# Step 3: Seperate x and y (features and target variable)
from sklearn.model_selection import train_test_split
x = df.drop(columns=["PitfallPresent"])  # Features
y = df["PitfallPresent"]  # Target variable

In [12]:
x

Unnamed: 0,TerrainType,Weather,RockDensity,SurfaceRoughness,MoisturePct,SlopeDeg,TrafficLoad,DrainageQuality,DepthCm,SoilHardness,VibrationLevel
0,3,2,2,0.17,86.92,10.94,0,1,192.8,1.41,2.59
1,0,1,1,6.38,68.44,10.02,0,0,9.5,8.70,2.35
2,1,3,1,7.42,20.18,36.27,1,2,161.8,1.69,4.43
3,0,1,1,9.19,60.96,3.11,2,2,143.8,1.25,2.47
4,0,1,0,8.85,9.09,28.71,1,0,79.5,1.04,0.89
...,...,...,...,...,...,...,...,...,...,...,...
9995,4,1,0,7.90,57.01,24.82,1,1,17.9,7.70,4.14
9996,1,1,2,8.50,18.99,26.33,1,0,178.1,8.03,2.12
9997,4,2,1,1.11,97.46,3.13,1,0,8.2,2.91,0.56
9998,3,2,1,9.25,88.53,13.61,1,1,28.6,3.42,3.82


In [13]:
y

0       0
1       1
2       1
3       0
4       1
       ..
9995    1
9996    1
9997    0
9998    1
9999    0
Name: PitfallPresent, Length: 10000, dtype: int64

In [14]:
# Step 4: Split data into training and testing sets (80% train, 20% test)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [15]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# 1️⃣ Encode categorical columns
categorical_cols = ['TerrainType','Weather','RockDensity','TrafficLoad','DrainageQuality']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    xtrain[col] = le.fit_transform(xtrain[col])
    xtest[col] = le.transform(xtest[col])
    label_encoders[col] = le  # Save encoder for later

# 2️⃣ Scale numeric features
scaler = StandardScaler()
xtrain_scaled = scaler.fit_transform(xtrain)
xtest_scaled = scaler.transform(xtest)


In [16]:
xtrain

Unnamed: 0,TerrainType,Weather,RockDensity,SurfaceRoughness,MoisturePct,SlopeDeg,TrafficLoad,DrainageQuality,DepthCm,SoilHardness,VibrationLevel
9254,4,0,1,0.59,32.86,7.13,0,0,67.6,4.65,4.46
1561,4,4,1,6.63,46.52,42.67,2,0,65.9,7.03,1.88
1670,1,3,1,7.28,73.72,6.20,1,1,108.4,9.21,4.02
6087,3,4,1,4.77,14.04,7.49,1,0,104.1,3.08,4.27
6669,1,0,0,4.30,74.49,34.41,1,0,99.9,3.17,4.99
...,...,...,...,...,...,...,...,...,...,...,...
5734,0,3,1,6.65,70.43,5.79,2,1,3.0,5.98,3.38
5191,3,2,0,0.38,58.16,43.28,0,2,15.6,2.13,1.52
5390,3,3,2,0.37,53.45,9.05,0,1,58.8,5.70,1.74
860,2,3,1,4.66,51.41,1.28,0,0,48.0,2.97,2.77


In [17]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(xtrain, ytrain)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [18]:
ypred = model.predict(xtest)

In [19]:

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("pitfall_prediction_dataset.csv")

# Encode categorical columns
categorical_cols = ['TerrainType', 'Weather', 'RockDensity', 
                    'TrafficLoad', 'DrainageQuality']
label_encoder = LabelEncoder()
for col in categorical_cols:
    df[col] = label_encoder.fit_transform(df[col])

# Select features and target
X = df[['TerrainType', 'Weather', 'RockDensity', 'SurfaceRoughness', 
        'MoisturePct', 'SlopeDeg', 'TrafficLoad', 'DrainageQuality', 
        'DepthCm', 'SoilHardness', 'VibrationLevel']]  # 11 features

y = df['PitfallPresent']   # target column (0 = no pitfall, 1 = pitfall)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Predictions on test set:", y_pred[:5])

# Example: user input
user_data = [[0, 0, 0, 5, 10.5, 30.0, 1, 1, 50, 2.5, 3]]  # 11 features
user_prediction = model.predict(user_data)
print(f"Predicted Risk Score: {user_prediction[0]:.2f}")


Predictions on test set: [0 0 1 1 1]
Predicted Risk Score: 0.00




In [None]:
%pip install gradio
%pip install flask
%pip install joblib


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\Kasturi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\Kasturi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\Kasturi\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [20]:
import joblib

# Save the trained model
joblib.dump(model, "pitfall_model.pkl")


['pitfall_model.pkl']