In [None]:
# Insert the library
import boto3
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt

In [None]:
# Initialization boto3
s3_client = boto3.client('s3')

In [None]:
# Set the variable bucket and file
bucket_name = 'yourfirehosebucket'
bucket_models = 'yourmodelsbucket'
file_key = 'yourdatajson'


In [None]:
# Read file JSON from S3 Bucket
response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
content = response['Body']
json_data = json.loads(content.read())

In [None]:
# Convert JSON to Data Frame
df = pd.DataFrame(json_data)
print(df.head())


In [None]:
# Convert timestamp coloumn to Datetime Format
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [None]:
# Convert coloumn timestamp to integer format(number of seconds since epoch)
df['timestamp'] = df['timestamp'].astype('int64') // 10**9

In [None]:
# Define features all data and the target is fire_intensity
features = ['timestamp', 'temperature', 'humidity', 'fire_intensity', 'gas_concentration', 'wind_speed', 'distance']
target = 'fire_intensity'

In [None]:
# Delete coloumn target from features
X = df[features].drop(columns=[target])
y = df[target]

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Prediction test data
y_pred = model.predict(X_test)

In [None]:
# Model Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

In [None]:
# Visualization prediction and actual result
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.xlabel('Actual Fire Intensity')
plt.ylabel('Predicted Fire Intensity')
plt.title('Actual vs Predicted Fire Intensity')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)
plt.show()


In [None]:
# Save model to pkl local file


In [None]:
# Upload model to S3 Model


In [None]:
# Optional: Add model metadata to S3
model_metadata = {
    'features': features,
    'target': target,
    'metrics': {
        'mse': mse,
        'r2': r2
    },
    'training_date': pd.Timestamp.now().isoformat()
}

metadata_key = f'models/metadata_{model_filename.replace(".pkl", ".json")}'
s3_client.put_object(
    Bucket=bucket_models,
    Key=metadata_key,
    Body=json.dumps(model_metadata),
    ContentType='application/json'
)
print(f'Model metadata uploaded to S3 bucket: {bucket_models}, key: {metadata_key}')