In [None]:
import os
os.environ['AWS_ACCESS_KEY_ID'] = ''
os.environ['AWS_SECRET_ACCESS_KEY'] = ''
os.environ['AWS_DEFAULT_REGION'] = 'us-east-2'  # Adjust to your bucket region

In [2]:
!pip install -r requirements.txt



In [4]:
import pandas as pd
import boto3
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib

# Initialize S3 client
s3 = boto3.client('s3')

# Download CSV files from S3 to local storage
bucket_name = 'wine-quality-data-rcc'
s3.download_file(bucket_name, 'winequality-red.csv', 'winequality-red.csv')
s3.download_file(bucket_name, 'winequality-white.csv', 'winequality-white.csv')

# Load and concatenate both CSV files
red_df = pd.read_csv('winequality-red.csv', sep=';')
white_df = pd.read_csv('winequality-white.csv', sep=';')
df = pd.concat([red_df, white_df], ignore_index=True)

# Preprocess
X = df.drop('quality', axis=1)
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train linear regression
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Evaluate
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Test MSE: {mse:.4f}')
print(f'Test R²: {r2:.4f}')

# Save model and scaler
joblib.dump(model, 'wine_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

Test MSE: 0.5467
Test R²: 0.2598


['scaler.pkl']

In [5]:
# Optional: Test Flask app locally (for evidence)
import subprocess
import time
process = subprocess.Popen(['python3', 'app.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
time.sleep(2)
print("Flask app started locally (check logs for errors)")
time.sleep(10)
process.terminate()
stdout, stderr = process.communicate()
if stderr:
    print(f"Errors: {stderr.decode()}")
else:
    print("No errors detected")

# Clear environment variables for security
del os.environ['AWS_ACCESS_KEY_ID']
del os.environ['AWS_SECRET_ACCESS_KEY']

Flask app started locally (check logs for errors)
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://169.254.255.2:5000
[33mPress CTRL+C to quit[0m

