In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


In [15]:
# Providing path of file
file_path = '/WineQuality2.csv'

# Read the data from the CSV file and create a DataFrame
wine_data = pd.read_csv(file_path)


# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Iterate through each column and check if it contains categorical data
for col in wine_data.columns:
    if wine_data[col].dtype == 'object':
        wine_data[col] = label_encoder.fit_transform(wine_data[col])

# Removing extra features
columns_to_remove = ['price','fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide' , 'total sulfur dioxide', 'density']
wine_data.drop(columns=columns_to_remove, inplace=True)



# Step 1: Data Preprocessing (if necessary)
# Split the data into features (X) and target variable (y)
X = wine_data.drop('quality', axis=1)  # 'Price' is the column containing the house prices
y = wine_data['quality']

data_encoded = pd.get_dummies(wine_data, drop_first=True)



In [16]:
# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
# Step 3: Build the Linear Regression Model

model = LinearRegression()
model.fit(X_train, y_train)



In [18]:
# Step 4: Model Evaluation
y_pred = model.predict(X_test)



In [19]:
# Evaluate the model using Mean Squared Error (MSE) and R-squared (coefficient of determination)
mse = mean_squared_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r_squared)


Mean Squared Error: 0.586114799477082
R-squared: 0.22387775022176448


In [20]:
# Step 6: Get the Model Coefficients and Intercept
# If you want to get the coefficients and intercept of the linear regression model:
coefficients = model.coef_
intercept = model.intercept_

print("Coefficients:", coefficients)
print("Intercept:", intercept)

Coefficients: [0.01013708 0.68401556 0.31571655 0.32360385]
Intercept: 1.5410227638056018


In [21]:
# Step 4: Get Input for House Features
print("Enter the house features:")
Type = int(input("Red Wine(1) or White Wine(2): "))
alcohol = float(input("amount of alcohol[RANGE 8-12]: "))
sulphates = float(input("amount of sulphates[RANGE 0-1]: "))
pH = float(input("pH[RANGE 0-5]: "))

Enter the house features:
Red Wine(1) or White Wine(2): 2
amount of alcohol[RANGE 8-12]: 8.36
amount of sulphates[RANGE 0-1]: 0.55
pH[RANGE 0-5]: 3.44


In [23]:
# Step 5: Make Prediction for the Input House Features
input_features = np.array([[Type, alcohol, sulphates, pH]])
predicted_quality = model.predict(input_features)

print("Predicted Quality:", predicted_quality[0])

Predicted Quality: 8.566508385662958


