# Week 3:

# a, Fuel Amount Prediction Using Linear Regression

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
# Replace 'fuel_data.csv' with the path to your CSV file
df = pd.read_csv("fuel_data.csv")

# Display first few rows to check data structure
print("Dataset Preview:")
print(df.head())

# Features and target
X = df[['distance']]  # Feature: distance traveled
y = df['fuel']  # Target: fuel consumed

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Coefficients of the model
print("Model Coefficients:")
print(f"Intercept: {model.intercept_}")
print(f"Slope: {model.coef_[0]}")

# Predict fuel for a specific distance
new_distance = pd.DataFrame({'distance': [150]})  # Create DataFrame for new input
predicted_fuel = model.predict(new_distance)
print(f"Predicted fuel for {new_distance.iloc[0, 0]} km: {predicted_fuel[0]:.2f} liters")

Dataset Preview:
   distance  fuel
0        10   0.8
1        20   1.6
2        30   2.4
3        40   3.2
4        50   4.0
Mean Squared Error: 0.0
Model Coefficients:
Intercept: 0.0
Slope: 0.08
Predicted fuel for 150 km: 12.00 liters


# b, Salary Prediction

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
df = pd.read_csv("salary_data.csv")

# Display dataset preview
print("Dataset Preview:")
print(df.head())

# One-hot encoding for categorical features (industry and location)
df = pd.get_dummies(df, columns=['industry', 'location'], drop_first=True)

# Features and target
X = df[['years_experience', 'qualification'] + [col for col in df.columns if 'industry_' in col or 'location_' in col]]
y = df['salary']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Ensure the new profile matches the training data structure
new_data = pd.DataFrame({
    'years_experience': [5],
    'qualification': [3],
    'industry_IT': [1],
    'industry_Marketing': [0],  # Ensure this matches training columns
    'location_CityB': [1],
    'location_CityC': [0],
}, columns=X_train.columns)  # Ensure exact column alignment

# Predict salary for the specific profile
predicted_salary = model.predict(new_data)
print(f"Predicted salary for the new profile: {predicted_salary[0]:.2f}")

Dataset Preview:
   years_experience  qualification   industry location  salary
0                 2              2         IT    CityA   50000
1                 5              3    Finance    CityB   80000
2                 3              2         IT    CityA   55000
3                 7              4    Finance    CityB   90000
4                 1              1  Marketing    CityC   45000
Mean Squared Error: 4674945.215485746
Predicted salary for the new profile: 72162.16


# c, Electricity Consumption Prediction

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
# Replace 'electricity_data.csv' with the path to your CSV file
df = pd.read_csv("electricity_data.csv")

# Display dataset preview
print("Dataset Preview:")
print(df.head())

# One-hot encoding for the categorical feature 'season'
df = pd.get_dummies(df, columns=['season'], drop_first=True)

# Features and target
X = df[['household_size', 'num_appliances', 'daily_usage_hours'] + [col for col in df.columns if 'season_' in col]]
y = df['electricity_consumption']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Ensure the new profile matches the training data structure
new_data = pd.DataFrame({
    'household_size': [4],
    'num_appliances': [8],
    'daily_usage_hours': [6],
    'season_Summer': [0],
    'season_Winter': [1],
    'season_Spring': [0]  # Ensure all season columns are present
}, columns=X.columns)  # Align with the training data structure

# Scale the new data
new_data_scaled = scaler.transform(new_data)

# Predict electricity consumption for the new profile
predicted_consumption = model.predict(new_data_scaled)
print(f"Predicted electricity consumption for the new profile: {predicted_consumption[0]:.2f}")

Dataset Preview:
   household_size  num_appliances  daily_usage_hours  season  \
0               3               5                  5  Winter   
1               4               8                  6  Summer   
2               2               3                  4  Spring   
3               5              10                  8  Winter   
4               3               4                  6  Summer   

   electricity_consumption  
0                      120  
1                      150  
2                       90  
3                      200  
4                      130  
Mean Squared Error: 258.47905060053984
Predicted electricity consumption for the new profile: 169.62
