In [None]:
!pip install flask==2.0.1
!pip install gunicorn==20.1.0
!pip install scikit-learn==0.24.1
!pip install mlflow==1.17.0
!pip install pytest
!pip install werkzeug==2.2.2
!pip install flask-ngrok


In [None]:
import numpy as np
import pandas as pd
# import data_clean_utils
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer, KNNImputer, MissingIndicator
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder, MinMaxScaler, PowerTransformer, OrdinalEncoder
from sklearn.model_selection import train_test_split

In [None]:
wine_data = pd.read_csv("/content/wine_quality.csv")

In [None]:
rows, cols = wine_data.shape

print("The dataset has {} rows and {} columns".format(rows,cols))

In [None]:
# datatypes of columns
(
    wine_data.dtypes
    .rename("Data Types")
)

In [None]:
wine_data.isna().sum()

In [None]:
wine_data.corr()

In [None]:
# String NaNs in the data

(wine_data == "NaN ").sum().sum()

In [None]:
(wine_data == "NaN ").sum()

In [None]:
wine_data_copy = wine_data.copy()

In [None]:
X = wine_data_copy.drop(columns='quality')
y = wine_data_copy['quality']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=42)

In [None]:
print("The size of train data is",X_train.shape)
print("The shape of test data is",X_test.shape)

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

In [None]:
!pip install optuna

In [None]:
import optuna
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import make_scorer, mean_squared_error, r2_score
import numpy as np

In [None]:


# Define the objective function for hyperparameter optimization
def objective(trial):
    # Suggest values for the hyperparameters
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 3, 20)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 5)

    # Create the RandomForestRegressor with suggested hyperparameters
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    # Use 3-fold cross-validation
    score = cross_val_score(model, X_train, y_train, cv=3, scoring='r2').mean()

    return score  # Optuna minimizes by default

# Run the study
study = optuna.create_study(direction="maximize" , sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=20)

# # Print the best hyperparameters and corresponding R^2 score
# print("Best hyperparameters:", study.best_params)
# print("Best RMSE:", study.best_value)
# print("Corresponding R^2:", study.best_trial.user_attrs["mean_r2"])


In [None]:
print("Best hyperparameters:", study.best_params)
print("Best R2:", study.best_value)

In [None]:
# Use the best hyperparameters to train the final model
best_params = study.best_params
best_model = RandomForestRegressor(
    n_estimators=best_params['n_estimators'],
    max_depth=best_params['max_depth'],
    min_samples_split=best_params['min_samples_split'],
    min_samples_leaf=best_params['min_samples_leaf'],
    random_state=42
)


In [None]:
best_model

In [None]:
import joblib
import pickle
joblib.dump(best_model, 'best_rf_model.pkl')

# Save the model
with open('best_rf_model.pkl', 'wb') as model_file:
    pickle.dump(best_model, model_file)

In [None]:
from flask_ngrok import run_with_ngrok
from flask import Flask, request, jsonify

In [None]:
from flask import Flask, request, jsonify, render_template
app = Flask(__name__)

run_with_ngrok(app)


@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    # Extract data from form
    int_features = [int(x) for x in request.form.values()]
    final_features = [np.array(int_features)]

    # Make prediction
    prediction = model.predict(final_features)

    return render_template('index.html', prediction_text='Prediction: {}'.format(prediction))

if __name__ == "__main__":
    # app.config['NGROK_PORT'] = 8080
    app.run()


 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Exception in thread Thread-10:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/usr/local/lib/python3.11/dist-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connectionpool.py", line 493, in _make_reque

In [None]:
# %tb

In [None]:
# rf.fit(X_train_scaled_df, y_train)

# # Predict on both training and test data
# y_train_pred = rf.predict(X_train_scaled_df)
# y_test_pred = rf.predict(X_test_scaled_df)

In [None]:
# from sklearn.metrics import mean_squared_error, r2_score

# # Assuming y_train_pred and y_test_pred are your predictions from the model
# train_r2 = r2_score(y_train, y_train_pred)
# test_r2 = r2_score(y_test, y_test_pred)

# # Calculate Mean Squared Error (MSE)
# train_mse = mean_squared_error(y_train, y_train_pred)
# test_mse = mean_squared_error(y_test, y_test_pred)

# # Print the results
# print(f"Training R^2: {train_r2}")
# print(f"Test R^2: {test_r2}")
# print(f"Training MSE: {train_mse}")
# print(f"Test MSE: {test_mse}")


In [None]:
# train_r2

In [None]:
# test_r2