<a href="https://colab.research.google.com/github/05022006/Freelancer-Cost-Prediction-System/blob/main/Untitled18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import random
import datetime

# Define the ranges for each column based on the sample data
num_rows = 10000

# Generate data
worker_ids = range(1, num_rows + 1)
timestamps = [datetime.datetime(2023, 11, 1, random.randint(7, 19), random.randint(0, 59), 0) for _ in range(num_rows)]
deliveries_per_hour = [random.randint(2, 5) for _ in range(num_rows)]
earnings_per_delivery = [random.choice([90, 95, 100, 105, 110, 115, 120, 130]) for _ in range(num_rows)]
ratings = [round(random.uniform(4.1, 5.0), 1) for _ in range(num_rows)]
distance_per_delivery = [round(random.uniform(2.4, 7.1), 1) for _ in range(num_rows)]
experience_level = [random.randint(1, 3) for _ in range(num_rows)]
locations = [random.choice(['Downtown', 'Suburbs', 'Industrial Area', 'Residential Area']) for _ in range(num_rows)]
time_per_delivery = [round(random.uniform(0.5, 0.9), 2) for _ in range(num_rows)]

# Create the DataFrame
data = {
    "Worker ID": worker_ids,
    "Timestamp": timestamps,
    "Deliveries Per Hour": deliveries_per_hour,
    "Earning Per Delivery": earnings_per_delivery,
    "Rating": ratings,
    "Distance Per Delivery (km)": distance_per_delivery,
    "Experience Level": experience_level,
    "Location": locations,
    "Time Per Delivery (hours)": time_per_delivery
}
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("delivery_dataset.csv", index=False)
print("Dataset with 10,000 rows has been generated and saved as 'delivery_dataset.csv'.")


Dataset with 10,000 rows has been generated and saved as 'delivery_dataset.csv'.


Load the Dataset

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("delivery_dataset.csv")

# Display the first few rows
print(df.head())


   Worker ID            Timestamp  Deliveries Per Hour  Earning Per Delivery  \
0          1  2023-11-01 07:37:00                    5                   105   
1          2  2023-11-01 15:57:00                    4                   115   
2          3  2023-11-01 10:04:00                    3                   120   
3          4  2023-11-01 18:35:00                    4                   105   
4          5  2023-11-01 13:37:00                    3                   100   

   Rating  Distance Per Delivery (km)  Experience Level         Location  \
0     4.8                         5.2                 1         Downtown   
1     4.4                         5.2                 3  Industrial Area   
2     4.4                         5.7                 2  Industrial Area   
3     4.3                         3.8                 2          Suburbs   
4     4.5                         4.6                 3         Downtown   

   Time Per Delivery (hours)  
0                       0.51  


Data Exploration: missing values

In [None]:
print(df.isnull().sum())


Worker ID                     0
Timestamp                     0
Deliveries Per Hour           0
Earning Per Delivery          0
Rating                        0
Distance Per Delivery (km)    0
Experience Level              0
Location                      0
Time Per Delivery (hours)     0
dtype: int64


Summarize the data

In [None]:
print(df.describe())


         Worker ID  Deliveries Per Hour  Earning Per Delivery        Rating  \
count  10000.00000         10000.000000          10000.000000  10000.000000   
mean    5000.50000             3.488200            108.085500      4.550400   
std     2886.89568             1.115968             12.483901      0.263057   
min        1.00000             2.000000             90.000000      4.100000   
25%     2500.75000             2.000000             98.750000      4.300000   
50%     5000.50000             3.000000            105.000000      4.600000   
75%     7500.25000             4.000000            115.000000      4.800000   
max    10000.00000             5.000000            130.000000      5.000000   

       Distance Per Delivery (km)  Experience Level  Time Per Delivery (hours)  
count                10000.000000       10000.00000               10000.000000  
mean                     4.760740           2.01720                   0.699745  
std                      1.361825           0

Unique values download

In [None]:
print(df["Location"].value_counts())


Location
Residential Area    2551
Suburbs             2503
Downtown            2481
Industrial Area     2465
Name: count, dtype: int64


Data Preprocessing

In [None]:
df = pd.get_dummies(df, columns=["Location"], drop_first=True)


Feature scaling

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
numerical_columns = ["Deliveries Per Hour", "Earning Per Delivery", "Rating", "Distance Per Delivery (km)", "Time Per Delivery (hours)"]
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])


Drop unnecessary columns

In [None]:
df = df.drop(columns=["Worker ID", "Timestamp"])


define target variable

In [None]:
X = df.drop(columns=["Earning Per Delivery"])  # Features
y = df["Earning Per Delivery"]  # Target


Split the Data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


6 ML model RF

In [None]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


evaluate model

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))


MSE: 1.110936528116975
R^2 Score: -0.12091760961361375


classification:

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30]
}

grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3)
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'max_depth': 10, 'n_estimators': 200}


Save the Model

In [None]:
import joblib

joblib.dump(model, "delivery_model.pkl")
print("Model saved as 'delivery_model.pkl'.")


Model saved as 'delivery_model.pkl'.


Deployment

In [None]:
loaded_model = joblib.load("delivery_model.pkl")
new_data = X_test.iloc[0:1]  # Example: first row of test set
prediction = loaded_model.predict(new_data)
print("Prediction:", prediction)


Prediction: [-0.24717069]


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load Dataset
df = pd.read_csv("/content/delivery_dataset.csv")

# Step 2: Create Hourly Wages as Target Variable
df['Hourly Wage'] = df['Deliveries Per Hour'] * df['Earning Per Delivery']

# Step 3: Select Features and Target
X = df[['Deliveries Per Hour', 'Rating', 'Distance Per Delivery (km)', 'Experience Level', 'Time Per Delivery (hours)']]
y = df['Hourly Wage']

# Step 4: Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Regression Model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate Model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error (MSE):", mse)
print("R^2 Score:", r2)

# Step 7: User Input for Prediction
def predict_hourly_wage():
    print("\nEnter the following details to predict hourly wage:")
    deliveries_per_hour = float(input("Deliveries Per Hour: "))
    rating = float(input("Rating: "))
    distance_per_delivery = float(input("Distance Per Delivery (km): "))
    experience_level = int(input("Experience Level (1-5 ): "))
    time_per_delivery = float(input("Time Per Delivery (mins): "))

    # Prepare input
    user_data = pd.DataFrame([[
        deliveries_per_hour,
        rating,
        distance_per_delivery,
        experience_level,
        time_per_delivery
    ]], columns=['Deliveries Per Hour', 'Rating', 'Distance Per Delivery (km)',
                 'Experience Level', 'Time Per Delivery (hours)'])

    # Predict hourly wage
    hourly_wage = model.predict(user_data)
    print(f"\nPredicted Hourly Wage: ${hourly_wage[0]:.2f}")

# Call the function to test user input
predict_hourly_wage()


Mean Squared Error (MSE): 2413.2904885392077
R^2 Score: 0.852240231125329

Enter the following details to predict hourly wage:
Deliveries Per Hour: 2
Rating: 3
Distance Per Delivery (km): 4
Experience Level (1-5 ): 5
Time Per Delivery (mins): 20

Predicted Hourly Wage: $207.75
