<a href="https://colab.research.google.com/github/Kumarkashish511/Data_Generation_using_Modeling_Simulation/blob/main/Data_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install simpy

import simpy
import random
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB


def simulate_system(mean_interarrival, mean_service, customers=120):

    env = simpy.Environment()
    server = simpy.Resource(env, capacity=1)
    waits = []

    def client(env, resource):
        arrival = env.now
        yield env.timeout(random.expovariate(1 / mean_interarrival))

        with resource.request() as req:
            yield req
            waits.append(env.now - arrival)
            yield env.timeout(random.expovariate(1 / mean_service))

    for _ in range(customers):
        env.process(client(env, server))

    env.run()

    return np.mean(waits) if waits else 0


random.seed(42)
np.random.seed(42)

data = []

for _ in range(1000):

    interarrival = np.random.uniform(1.5, 9.5)
    service = np.random.uniform(0.8, 2.2)

    avg_wait = simulate_system(interarrival, service)

    data.append([interarrival, service, avg_wait])

df = pd.DataFrame(data, columns=["Interarrival", "Service", "Avg_Wait"])

threshold = df["Avg_Wait"].median()
df["Class"] = (df["Avg_Wait"] > threshold).astype(int)

X = df[["Interarrival", "Service"]]
y = df["Class"]

print("Class Distribution:\n")
print(y.value_counts())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42),
    "SVM (RBF)": SVC(kernel="rbf", probability=True, random_state=42),
    "KNN (k=5)": KNeighborsClassifier(n_neighbors=5),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Naive Bayes": GaussianNB()
}

results = []

for name, model in models.items():

    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)

    results.append([name, acc, prec, rec, f1])

results_df = pd.DataFrame(
    results,
    columns=["Model", "Accuracy", "Precision", "Recall", "F1 Score"]
).sort_values(by="Accuracy", ascending=False)

print("\nClassification Model Comparison:\n")
print(results_df.to_string(index=False))

best_model = results_df.iloc[0]["Model"]
print(f"\nBest Performing Model: {best_model}")

Class Distribution:

Class
1    500
0    500
Name: count, dtype: int64

Classification Model Comparison:

              Model  Accuracy  Precision   Recall  F1 Score
  Gradient Boosting     0.928   0.932203 0.916667  0.924370
      Random Forest     0.920   0.923729 0.908333  0.915966
           AdaBoost     0.920   0.931034 0.900000  0.915254
          SVM (RBF)     0.920   0.909836 0.925000  0.917355
Logistic Regression     0.920   0.923729 0.908333  0.915966
        Naive Bayes     0.916   0.923077 0.900000  0.911392
      Decision Tree     0.912   0.915254 0.900000  0.907563
          KNN (k=5)     0.904   0.913793 0.883333  0.898305

Best Performing Model: Gradient Boosting
