## Bert Training

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, BayesianRidge, LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor

# Function to load data
def load_data(url):
    data = pd.read_csv(url)
    X = data.drop(columns=['output']).values
    y = data['output'].values
    return X, y

# Function to define models and hyperparameter grids
def define_models():
    models = {
        "Linear Regression": LinearRegression(),
        "Polynomial Regression": Pipeline([('scaler', StandardScaler()), 
                                          ('poly', PolynomialFeatures(degree=2)), 
                                          ('linear', LinearRegression())]),
        "Lasso Regression": Lasso(),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Bayesian Linear Regression": BayesianRidge(),
        "Support Vector Regression": SVR(),
        "Decision Tree Regression": DecisionTreeRegressor(),
        "Gaussian Process Regression": GaussianProcessRegressor(),
        "Random Forest Regression": RandomForestRegressor(),
        "KNN Regression": KNeighborsRegressor()
    }

    param_grids = {
        "Linear Regression": {},
        "Polynomial Regression": {'poly__degree': [2, 3, 4]},
        "Lasso Regression": {'alpha': [0.01, 0.1, 1, 10]},
        "Logistic Regression": {'C': [0.1, 1, 10], 'solver': ['lbfgs', 'liblinear']},
        "Bayesian Linear Regression": {'alpha_1': [1e-6, 1e-5, 1e-4], 'alpha_2': [1e-6, 1e-5, 1e-4]},
        "Support Vector Regression": {'C': [0.1, 1, 10], 'epsilon': [0.01, 0.1, 0.5]},
        "Decision Tree Regression": {'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10]},
        "Gaussian Process Regression": {'alpha': [1e-2, 1e-3, 1e-4]},
        "Random Forest Regression": {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]},
        "KNN Regression": {'n_neighbors': [3, 5, 7], 'weights': ['uniform', 'distance']}
    }

    return models, param_grids

# Function to perform GridSearchCV and return best model
def perform_grid_search(models, param_grids, X, y):
    results = []
    for model_name, model in models.items():
        print(f"Running GridSearchCV for {model_name}...")
        grid_search = GridSearchCV(model, param_grids.get(model_name, {}), cv=10, n_jobs=-1, scoring='neg_mean_squared_error')
        grid_search.fit(X, y)
        
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X)
        
        # Calculate accuracy based on threshold (±10%)
        threshold = 0.1
        correct_predictions = np.abs(y_pred - y) <= threshold * np.abs(y)
        accuracy = np.mean(correct_predictions) * 100  # Percentage of correct predictions
        
        results.append({
            'Model': model_name,
            'Best Params': grid_search.best_params_,
            'Accuracy': accuracy
        })
    return results

# Function to display results
def display_results(results):
    results_df = pd.DataFrame(results)
    print(results_df.sort_values(by="Accuracy", ascending=False))

# Main function to run the entire process
def main(url):
    X, y = load_data(url)
    models, param_grids = define_models()
    results = perform_grid_search(models, param_grids, X, y)
    display_results(results)

if __name__ == "__main__":
    url = 'bert_embeddings_input_output_custom_columns.csv'  # replace with your actual file URL
    main(url)


Running GridSearchCV for Linear Regression...
Running GridSearchCV for Polynomial Regression...


20 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Jyoshitha\anaconda\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Jyoshitha\anaconda\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Jyoshitha\anaconda\Lib\site-packages\sklearn\pipeline.py", line 471, in fit
    Xt = self._fit(X, y, routed_params)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Jyoshitha\anaconda\Lib\site-pa

Running GridSearchCV for Lasso Regression...
Running GridSearchCV for Logistic Regression...
Running GridSearchCV for Bayesian Linear Regression...
Running GridSearchCV for Support Vector Regression...
Running GridSearchCV for Decision Tree Regression...
Running GridSearchCV for Gaussian Process Regression...
Running GridSearchCV for Random Forest Regression...
Running GridSearchCV for KNN Regression...
                         Model  \
3          Logistic Regression   
9               KNN Regression   
6     Decision Tree Regression   
0            Linear Regression   
7  Gaussian Process Regression   
1        Polynomial Regression   
8     Random Forest Regression   
4   Bayesian Linear Regression   
5    Support Vector Regression   
2             Lasso Regression   

                                         Best Params   Accuracy  
3                    {'C': 1, 'solver': 'liblinear'}  97.819315  
9          {'n_neighbors': 7, 'weights': 'distance'}  84.112150  
6       {'max_depth'

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV

# Function to load data
def load_data(url):
    data = pd.read_csv(url)
    X = data.drop(columns=['output']).values
    y = data['output'].values
    return X, y

# Function to define models and hyperparameter grids
def define_models():
    models = {
        "Linear Regression": LinearRegression(),
        "Polynomial Regression": Pipeline([('scaler', StandardScaler()), 
                                          ('poly', PolynomialFeatures(degree=2)), 
                                          ('linear', LinearRegression())]),
        "Lasso Regression": Lasso(),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Bayesian Linear Regression": BayesianRidge(),
        "Support Vector Regression": SVR(),
        "Decision Tree Regression": DecisionTreeRegressor(),
        "Gaussian Process Regression": GaussianProcessRegressor(),
        "Random Forest Regression": RandomForestRegressor(),
        "KNN Regression": KNeighborsRegressor()
    }

    param_grids = {
        "Linear Regression": {},
        "Polynomial Regression": {'poly__degree': [2, 3, 4]},
        "Lasso Regression": {'alpha': [0.01, 0.1, 1, 10]},
        "Logistic Regression": {'C': [0.1, 1, 10], 'solver': ['lbfgs', 'liblinear']},
        "Bayesian Linear Regression": {'alpha_1': [1e-6, 1e-5, 1e-4], 'alpha_2': [1e-6, 1e-5, 1e-4]},
        "Support Vector Regression": {'C': [0.1, 1, 10], 'epsilon': [0.01, 0.1, 0.5]},
        "Decision Tree Regression": {'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10]},
        "Gaussian Process Regression": {'alpha': [1e-2, 1e-3, 1e-4]},
        "Random Forest Regression": {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]},
        "KNN Regression": {'n_neighbors': [3, 5, 7], 'weights': ['uniform', 'distance']}
    }

    return models, param_grids

# Function to evaluate models with additional metrics (RMSE, R^2, MAE, MAPE)
def evaluate_models(models, param_grids, X, y, threshold=0.1):
    additional_results = []
    for model_name, model in models.items():
        print(f"Evaluating {model_name}...")

        # Apply GridSearchCV with 10-fold cross-validation
        grid_search = GridSearchCV(model, param_grids.get(model_name, {}), cv=10, n_jobs=-1, scoring='neg_mean_squared_error')
        grid_search.fit(X, y)
        
        # Get the best model and make predictions
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X)
        
        # Calculate RMSE
        rmse = np.sqrt(mean_squared_error(y, y_pred))
        
        # Calculate R^2
        r2 = r2_score(y, y_pred)
        
        # Calculate MAE
        mae = mean_absolute_error(y, y_pred)
        
        # Calculate MAPE (Mean Absolute Percentage Error)
        mape = np.mean(np.abs((y - y_pred) / y)) * 100
        
        # Calculate accuracy based on threshold (±10%)
        correct_predictions = np.abs(y_pred - y) <= threshold * np.abs(y)
        accuracy = np.mean(correct_predictions) * 100  # Percentage of correct predictions

        # Store the results
        additional_results.append({
            'Model': model_name,
            'Best Params': grid_search.best_params_,
            'Accuracy': accuracy,
            'RMSE': rmse,
            'R^2': r2,
            'MAE': mae,
            'MAPE': mape
        })
    
    return additional_results

# Function to display results sorted by accuracy and print summary
def display_results_and_summary(results):
    additional_results_df = pd.DataFrame(results)
    
    # Calculate the mean and standard deviation for each metric
    metrics_summary = {
        'Mean RMSE': np.mean([result['RMSE'] for result in results]),
        'Std Dev RMSE': np.std([result['RMSE'] for result in results]),
        'Mean R^2': np.mean([result['R^2'] for result in results]),
        'Std Dev R^2': np.std([result['R^2'] for result in results]),
        'Mean MAE': np.mean([result['MAE'] for result in results]),
        'Std Dev MAE': np.std([result['MAE'] for result in results]),
        'Mean MAPE': np.mean([result['MAPE'] for result in results]),
        'Std Dev MAPE': np.std([result['MAPE'] for result in results]),
    }

    # Print individual model results sorted by accuracy
    print(additional_results_df.sort_values(by="Accuracy", ascending=False))

    # Print summary of metrics
    print("\nSummary of Metrics:")
    for metric, value in metrics_summary.items():
        print(f"{metric}: {value}")

# Main function to run the entire process
def main(url):
    X, y = load_data(url)
    models, param_grids = define_models()
    results = evaluate_models(models, param_grids, X, y)
    display_results_and_summary(results)

if __name__ == "__main__":
    url = 'bert_embeddings_input_output_custom_columns.csv'  # replace with your actual file URL
    main(url)


Evaluating Linear Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Polynomial Regression...


20 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Jyoshitha\anaconda\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Jyoshitha\anaconda\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Jyoshitha\anaconda\Lib\site-packages\sklearn\pipeline.py", line 471, in fit
    Xt = self._fit(X, y, routed_params)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Jyoshitha\anaconda\Lib\site-pa

Evaluating Lasso Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Logistic Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100
  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Bayesian Linear Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Support Vector Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Decision Tree Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100
  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Gaussian Process Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating Random Forest Regression...


  mape = np.mean(np.abs((y - y_pred) / y)) * 100


Evaluating KNN Regression...
                         Model  \
3          Logistic Regression   
9               KNN Regression   
0            Linear Regression   
6     Decision Tree Regression   
7  Gaussian Process Regression   
1        Polynomial Regression   
8     Random Forest Regression   
4   Bayesian Linear Regression   
5    Support Vector Regression   
2             Lasso Regression   

                                         Best Params   Accuracy      RMSE  \
3                    {'C': 1, 'solver': 'liblinear'}  97.819315  0.255774   
9          {'n_neighbors': 7, 'weights': 'distance'}  84.112150  0.096674   
0                                                 {}  63.239875  0.096674   
6         {'max_depth': 10, 'min_samples_split': 10}  62.928349  0.473911   
7                                  {'alpha': 0.0001}  62.928349  0.096674   
1                                {'poly__degree': 2}  53.271028  0.456946   
8  {'max_depth': 20, 'min_samples_split': 5, 'n_e...  28.

  mape = np.mean(np.abs((y - y_pred) / y)) * 100
  mape = np.mean(np.abs((y - y_pred) / y)) * 100


## Bert Testing Q1

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression, BayesianRidge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

# Function to load and split the data
def load_and_split_data(url, test_size=0.2, random_state=42):
    data = pd.read_csv(url)
    X = data.iloc[:, :-1]  # Features
    y = data.iloc[:, -1]   # Target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# Function to define models
def define_models():
    models = {
        "Linear Regression": LinearRegression(),
        "Polynomial Regression (Degree 2)": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
        "Lasso Regression": Lasso(),
        "Logistic Regression (used for binary targets)": LogisticRegression(),
        "Bayesian Linear Regression": BayesianRidge(),
        "Support Vector Regression": SVR(),
        "Decision Tree Regression": DecisionTreeRegressor(),
        "Gaussian Process Regression": GaussianProcessRegressor(),
        "Random Forest Regression": RandomForestRegressor(),
        "KNN Regression": KNeighborsRegressor()
    }
    return models

# Function to train and evaluate models
def train_and_evaluate_models(models, X_train, X_test, y_train, y_test):
    for model_name, model in models.items():
        try:
            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            
            # Print results
            print(f"Model: {model_name}")
            print(f"Predicted Marks: {y_pred[:5]}")  # Print first 5 predictions
            print(f"Actual Marks: {y_test[:5].values}")  # Print first 5 actual values
            print("-" * 40)
        except Exception as e:
            print(f"Model: {model_name} - Error: {str(e)}")
            print("-" * 40)

# Main function to run the entire process
def main(url):
    # Load and split the data
    X_train, X_test, y_train, y_test = load_and_split_data(url)
    
    # Define models
    models = define_models()
    
    # Train and evaluate models
    train_and_evaluate_models(models, X_train, X_test, y_train, y_test)

if __name__ == "__main__":
    url = "Q1_bert.csv"  # replace with your actual file URL
    main(url)


Model: Linear Regression
Predicted Marks: [1.63950232 3.91859908 2.29691113 2.01460243 3.86849459]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Polynomial Regression (Degree 2)
Predicted Marks: [1.31369907 4.23004346 2.07053043 1.20378361 3.88707994]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Lasso Regression
Predicted Marks: [2.20434655 2.51420874 2.45811418 2.33772177 2.50542081]
Actual Marks: [0 4 2 1 5]
----------------------------------------


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: Logistic Regression (used for binary targets)
Predicted Marks: [0 5 1 0 5]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Bayesian Linear Regression
Predicted Marks: [1.19738859 3.9980072  2.16936771 1.73858151 4.37723246]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Support Vector Regression
Predicted Marks: [0.6227684  3.62052061 2.36500661 0.68830648 3.45888562]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Decision Tree Regression
Predicted Marks: [0. 5. 0. 0. 5.]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Gaussian Process Regression
Predicted Marks: [2.17721570e-16 6.62087253e-01 6.19336461e-02 5.41587320e-08
 2.05435023e-01]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: Random Forest Regression
Predicted Marks: [1.3  3.28 1.78 0.4  4.26]
Actual Marks: [0 4 2 1 5]
----------------------------------------
Model: KNN Regression
Predict

## Bert Testing Q2

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression, BayesianRidge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

# Function to load and preprocess the data
def load_and_preprocess_data(url):
    # Load dataset
    data = pd.read_csv(url)

    # Handle missing values (drop rows where the target is NaN)
    data = data.dropna(subset=[data.columns[-1]])

    # Split into features and target
    X = data.iloc[:, :-1]  # Features
    y = data.iloc[:, -1]   # Target
    
    return X, y

# Function to split the data into training and testing sets
def split_data(X, y, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# Function to define the models
def define_models():
    models = {
        "Linear Regression": LinearRegression(),
        "Polynomial Regression (Degree 2)": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
        "Lasso Regression": Lasso(),
        "Logistic Regression (used for binary targets)": LogisticRegression(),
        "Bayesian Linear Regression": BayesianRidge(),
        "Support Vector Regression": SVR(),
        "Decision Tree Regression": DecisionTreeRegressor(),
        "Gaussian Process Regression": GaussianProcessRegressor(),
        "Random Forest Regression": RandomForestRegressor(),
        "KNN Regression": KNeighborsRegressor()
    }
    return models

# Function to train and evaluate the models
def train_and_evaluate_models(models, X_train, X_test, y_train, y_test):
    for model_name, model in models.items():
        try:
            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            
            # Print results
            print(f"Model: {model_name}")
            print(f"Predicted Marks: {y_pred[:5]}")  # Print first 5 predictions
            print(f"Actual Marks: {y_test[:5].values}")  # Print first 5 actual values
            print("-" * 40)
        except Exception as e:
            print(f"Model: {model_name} - Error: {str(e)}")
            print("-" * 40)

# Main function to execute the whole process
def main(url):
    # Load and preprocess the data
    X, y = load_and_preprocess_data(url)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = split_data(X, y)
    
    # Define models
    models = define_models()
    
    # Train and evaluate models
    train_and_evaluate_models(models, X_train, X_test, y_train, y_test)

if __name__ == "__main__":
    url = "Q2_bert.csv"  # replace with your actual file URL
    main(url)


Model: Linear Regression
Predicted Marks: [2.55356843 1.54077868 2.55356843 1.7254718  5.        ]
Actual Marks: [4. 0. 4. 0. 5.]
----------------------------------------
Model: Polynomial Regression (Degree 2)
Predicted Marks: [2.25677647 1.36314363 2.25677647 1.70913599 5.        ]
Actual Marks: [4. 0. 4. 0. 5.]
----------------------------------------
Model: Lasso Regression
Predicted Marks: [2.38095238 2.38095238 2.38095238 2.38095238 2.38095238]
Actual Marks: [4. 0. 4. 0. 5.]
----------------------------------------
Model: Logistic Regression (used for binary targets)
Predicted Marks: [5. 0. 5. 2. 5.]
Actual Marks: [4. 0. 4. 0. 5.]
----------------------------------------
Model: Bayesian Linear Regression
Predicted Marks: [2.54233147 1.53974658 2.54233147 1.77237055 4.99999994]
Actual Marks: [4. 0. 4. 0. 5.]
----------------------------------------
Model: Support Vector Regression
Predicted Marks: [2.67813639 1.44656791 2.67813639 1.63145921 4.90017929]
Actual Marks: [4. 0. 4. 0. 

## Bert testing Q3

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression, BayesianRidge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

# Function to load and preprocess the data
def load_and_preprocess_data(url):
    # Load dataset
    data = pd.read_csv(url)

    # Handle missing values (drop rows where the target is NaN)
    data = data.dropna(subset=[data.columns[-1]])

    # Split into features and target
    X = data.iloc[:, :-1]  # Features
    y = data.iloc[:, -1]   # Target
    
    return X, y

# Function to split the data into training and testing sets
def split_data(X, y, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# Function to define the models
def define_models():
    models = {
        "Linear Regression": LinearRegression(),
        "Polynomial Regression (Degree 2)": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
        "Lasso Regression": Lasso(),
        "Logistic Regression (used for binary targets)": LogisticRegression(),
        "Bayesian Linear Regression": BayesianRidge(),
        "Support Vector Regression": SVR(),
        "Decision Tree Regression": DecisionTreeRegressor(),
        "Gaussian Process Regression": GaussianProcessRegressor(),
        "Random Forest Regression": RandomForestRegressor(),
        "KNN Regression": KNeighborsRegressor()
    }
    return models

# Function to train and evaluate the models
def train_and_evaluate_models(models, X_train, X_test, y_train, y_test):
    for model_name, model in models.items():
        try:
            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            
            # Convert predictions and actual values to integers
            y_pred_int = y_pred.astype(int)
            y_test_int = y_test.astype(int)
            
            # Print results
            print(f"Model: {model_name}")
            print(f"Predicted Marks: {list(y_pred_int[:5])}")  # Print first 5 predictions as integers
            print(f"Actual Marks: {list(y_test_int[:5].values)}")  # Print first 5 actual values as integers
            print("-" * 40)
        except Exception as e:
            print(f"Model: {model_name} - Error: {str(e)}")
            print("-" * 40)

# Main function to execute the whole process
def main(url):
    # Load and preprocess the data
    X, y = load_and_preprocess_data(url)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = split_data(X, y)
    
    # Define models
    models = define_models()
    
    # Train and evaluate models
    train_and_evaluate_models(models, X_train, X_test, y_train, y_test)

if __name__ == "__main__":
    url = "Q3_bert.csv"  # replace with your actual file URL
    main(url)


Model: Linear Regression
Predicted Marks: [1, 3, 0, 4, 1]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Polynomial Regression (Degree 2)
Predicted Marks: [1, 3, 0, 4, 1]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Lasso Regression
Predicted Marks: [2, 2, 2, 2, 2]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Logistic Regression (used for binary targets)
Predicted Marks: [1, 4, 1, 4, 1]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Bayesian Linear Regression
Predicted Marks: [1, 3, 0, 3, 1]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Support Vector Regression
Predicted Marks: [1, 2, 1, 2, 1]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Decision Tree Regression
Predicted Marks: [5, 1, 1, 4, 1]
Actual Marks: [1, 1, 1, 5, 0]
----------------------------------------
Model: Gaussian Process Regress