In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=';')

In [3]:
X = df.drop(columns='quality')
y = df['quality']

In [4]:
model = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

In [5]:
y_pred = model.predict(X_test)

In [7]:
print(f"RMSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred):.2f}")

RMSE: 0.39
R² Score: 0.40


In [8]:
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
fig = px.scatter(results, x='Actual', y='Predicted', title="Linear Regression: Actual vs Predicted")
fig.add_shape(type='line', x0=0, y0=0, x1=10, y1=10, line=dict(color='red', dash='dash'))
fig.show()