In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


In [3]:

# Step 1: Import the data from the "song_popularity.csv" file and store it in a variable 'df'
df = pd.read_csv("weather_data.csv")

# Step 2: Display the number of rows and columns in the dataset
print("Number of rows and columns:", df.shape)

# Step 3: Display the first few rows of the dataset to get an overview
print("First few rows of the dataset:")
df.head()

Number of rows and columns: (49, 3)
First few rows of the dataset:


Unnamed: 0,hours_sunlight,humidity_level,daily_temperature
0,10.5,65,22.3
1,9.2,70,21.0
2,7.8,80,18.5
3,6.4,90,17.2
4,8.1,75,19.4


In [4]:
# Step 4: Check for any missing values in the dataset
print("Missing values in the dataset:")
df.isna().sum()

Missing values in the dataset:


hours_sunlight       0
humidity_level       0
daily_temperature    0
dtype: int64

In [5]:
# Step 1: Select the features and target variable for modeling
features = ['hours_sunlight', 'humidity_level']
X = df[features]
y = df['daily_temperature']

# Step 2: Split the data into training and test sets with a test size of 30%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Step 3: Create a Linear Regression model and fit it using the training data
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Print the model's coefficients and intercept
print("Model coefficients:", model.coef_)
print("Model intercept:", model.intercept_)

Model coefficients: [ 1.25083729 -0.02763612]
Model intercept: 11.51100793541826


In [7]:
# Step 1: Make predictions on the test set using the trained model
y_pred = model.predict(X_test)

# Step 2: Evaluate the model using Mean Squared Error (MSE) and R-squared (R2) metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [8]:
# Step 3: Print the MSE and R2 values
print("Mean Squared Error (MSE):", mse)
print("R-squared (R2):", r2)

# Step 4: Display the first few actual vs. predicted values for the daily temperature
print("First few actual vs. predicted values:")
comparison_df = pd.DataFrame({'Actual': y_test[:5].values, 'Predicted': y_pred[:5]})
print(comparison_df)

Mean Squared Error (MSE): 0.11488330185581327
R-squared (R2): 0.9833806480142233
First few actual vs. predicted values:
   Actual  Predicted
0    18.7  18.736670
1    17.0  16.944765
2    21.3  21.334350
3    23.9  23.431695
4    19.6  19.847885
