<a href="https://colab.research.google.com/github/AroraAnshul301/Api-Call-Dataset/blob/main/Assignment_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
pip install pycaret



In [28]:
import pandas as pd

file_path = '/content/API Call Dataset.csv'
data = pd.read_csv(file_path)

data.head()


Unnamed: 0,API Code,Time of Call
0,A1,01-02-2025 00:00
1,A3,01-02-2025 00:17
2,A3,01-02-2025 00:19
3,A8,01-02-2025 00:37
4,A8,01-02-2025 00:47


In [29]:
top_apis = data['API Code'].value_counts().head(3)

top_apis


Unnamed: 0_level_0,count
API Code,Unnamed: 1_level_1
A9,2451
A2,2438
A7,2410


In [30]:
top_api_files = {}
for api in top_apis.index:
    filtered_data = data[data['API Code'] == api]
    file_name = f'/content/{api}.csv'
    filtered_data.to_csv(file_name, index=False)
    top_api_files[api] = file_name

top_api_files


{'A9': '/content/A9.csv', 'A2': '/content/A2.csv', 'A7': '/content/A7.csv'}

In [33]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

def build_and_evaluate_models(data, target_column='Time of Call'):
    try:
        data = data.copy()
        data['Timestamp'] = pd.to_datetime(data[target_column], dayfirst=True).astype(int) // 10**9

        X = data.index.values.reshape(-1, 1)
        y = data['Timestamp']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        models = {
            'Linear Regression': LinearRegression(),
            'Random Forest': RandomForestRegressor(random_state=42),
            'Support Vector Regressor': SVR(),
            'Decision Tree': DecisionTreeRegressor(random_state=42),
            'K-Nearest Neighbors': KNeighborsRegressor()
        }

        results = {}
        for model_name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            results[model_name] = mse

        best_model = min(results, key=results.get)
        return best_model, results

    except Exception as e:
        return str(e), None

top_api_files = {'A9': 'A9.csv', 'A2': 'A2.csv', 'A7': 'A7.csv'}
best_models = {}
for api, file_path in top_api_files.items():
    api_data = pd.read_csv(file_path)
    best_model, results = build_and_evaluate_models(api_data, target_column='Time of Call')
    best_models[api] = {'Best Model': best_model, 'Results': results}

for api, info in best_models.items():
    print(f"API: {api}")
    print(f"Best Model: {info['Best Model']}")
    print("Results:")
    for model, mse in info['Results'].items():
        print(f"  {model}: MSE = {mse}")
    print()


API: A9
Best Model: Random Forest
Results:
  Linear Regression: MSE = 3429628001.8734345
  Random Forest: MSE = 27919341.52959822
  Support Vector Regressor: MSE = 14487900999288.336
  Decision Tree: MSE = 55109276.57841141
  K-Nearest Neighbors: MSE = 39316202.98167006

API: A2
Best Model: Random Forest
Results:
  Linear Regression: MSE = 3148223568.021801
  Random Forest: MSE = 34531035.952555016
  Support Vector Regressor: MSE = 14532529689639.418
  Decision Tree: MSE = 68544250.81967214
  K-Nearest Neighbors: MSE = 35997032.95081967

API: A7
Best Model: Random Forest
Results:
  Linear Regression: MSE = 6496801610.398336
  Random Forest: MSE = 28606105.658335786
  Support Vector Regressor: MSE = 15440095737769.33
  Decision Tree: MSE = 55438192.53112033
  K-Nearest Neighbors: MSE = 38937471.53526971

