In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
Red = pd.read_csv('/content/winequality-red.csv', sep=';')
White = pd.read_csv('/content/winequality-white.csv', sep=';')

## **Merging the datasets**

In [None]:
Red['type'] = 'red'
White['type'] = 'white'
wine = pd.concat([White, Red])
wine.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,type
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6,white
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6,white
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6,white
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6,white
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6,white


## **Train Test Split**

In [None]:
import sklearn

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X_res,y_res,test_size=0.20,stratify=y_res,random_state=2)

In [None]:
print(X_res.shape,X_train.shape,X_test.shape)

(19250, 11) (15400, 11) (3850, 11)


## **Training the Model using Random Forest**

In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

## **Model Evaluation**

***Training Data***

In [None]:
X_train_pred = model.predict(X_train)

In [None]:
from sklearn import metrics

In [None]:
print('MAE:', metrics.mean_absolute_error(y_train, X_train_pred))
print('MSE:', metrics.mean_squared_error(y_train, X_train_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_train, X_train_pred)))

MAE: 0.08906493506493503
MSE: 0.0256098961038961
RMSE: 0.16003092233657876


In [None]:
metrics.r2_score(y_train, X_train_pred)

0.993597525974026

***Testing data***

In [None]:
X_test_pred = model.predict(X_test)

In [None]:
print('MAE:', metrics.mean_absolute_error(y_test, X_test_pred))
print('MSE:', metrics.mean_squared_error(y_test, X_test_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, X_test_pred)))

MAE: 0.23336103896103894
MSE: 0.16942264935064935
RMSE: 0.411609826596316


In [None]:
metrics.r2_score(y_test, X_test_pred)

0.9576443376623377

In [None]:
df =pd.DataFrame ([[9.4,0.7,0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4]])
df =pd.DataFrame(data=df.values, columns=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide',
        'total sulfur dioxide','density','pH','sulphates','alcohol'])
new_data_predictions = model.predict(df)
print('Predicted values:', new_data_predictions)

Predicted values: [4.82]


In [None]:
df =pd.DataFrame ([[7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8]])
df =pd.DataFrame(data=df.values, columns=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide',
        'total sulfur dioxide','density','pH','sulphates','alcohol'])
new_data_predictions = model.predict(df)
print('Predicted values:', new_data_predictions)

Predicted values: [5.47]


In [None]:
df =pd.DataFrame ([[6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5]])
df =pd.DataFrame(data=df.values, columns=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide',
        'total sulfur dioxide','density','pH','sulphates','alcohol'])
new_data_predictions = model.predict(df)
print('Predicted values:', new_data_predictions)

Predicted values: [5.55]


In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model1 = LogisticRegression(max_iter=2000, solver='saga')
model1.fit(X_train, y_train)



In [None]:
y_test_pred = model1.predict(X_test)


In [None]:
print('MAE:', metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:', metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))

MAE: 1.1041558441558441
MSE: 2.9451948051948054
RMSE: 1.7161569873396796


In [None]:
metrics.r2_score(y_test, y_test_pred)

0.26370129870129866

In [None]:
from google.colab import files
import joblib
model_filename="rf_model.pkl"

# Save the trained model to a file
joblib.dump(model,model_filename)
files.download('rf_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>