# Lab-8: Neural Network Implementation
## Fuel Consumption Dataset Analysis and Prediction

### Objectives:
1. Load and perform EDA on the Fuel Consumption dataset
2. Generate independent and dependent variables
3. Encode categorical variables and split dataset
4. Perform feature scaling using StandardScaler
5. Initialize Artificial Neural Network with 2 hidden layers
6. Create output layer and compile the network
7. Train the neural network with specified parameters


In [None]:
# Import necessary libraries for Neural Network implementation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow Version:", tf.__version__)
print("Libraries imported successfully!")


## Step 1: Load Dataset and Perform EDA


In [None]:
# Load the Fuel Consumption dataset
df = pd.read_csv('Fuel_Consumption_2000-2022.csv')

# Display basic information about the dataset
print("Dataset Shape:", df.shape)
print("\nDataset Info:")
print(df.info())
print("\nFirst 5 rows:")
print(df.head())
print("\nDataset Description:")
print(df.describe())


In [None]:
# Check for missing values
print("Missing Values:")
print(df.isnull().sum())

# Check unique values in categorical columns
categorical_columns = ['MAKE', 'VEHICLE CLASS', 'TRANSMISSION', 'FUEL']
for col in categorical_columns:
    print(f"\nUnique values in {col}: {df[col].nunique()}")
    print(f"Sample values: {df[col].unique()[:10]}")

# Check data types
print("\nData Types:")
print(df.dtypes)


In [None]:
# Exploratory Data Analysis - Visualizations
plt.figure(figsize=(15, 12))

# 1. Distribution of target variable (EMISSIONS)
plt.subplot(2, 3, 1)
plt.hist(df['EMISSIONS'], bins=50, alpha=0.7, color='skyblue')
plt.title('Distribution of CO2 Emissions')
plt.xlabel('CO2 Emissions (g/km)')
plt.ylabel('Frequency')

# 2. Fuel Consumption vs Emissions
plt.subplot(2, 3, 2)
plt.scatter(df['FUEL CONSUMPTION'], df['EMISSIONS'], alpha=0.5)
plt.title('Fuel Consumption vs CO2 Emissions')
plt.xlabel('Fuel Consumption (L/100km)')
plt.ylabel('CO2 Emissions (g/km)')

# 3. Engine Size vs Emissions
plt.subplot(2, 3, 3)
plt.scatter(df['ENGINE SIZE'], df['EMISSIONS'], alpha=0.5, color='orange')
plt.title('Engine Size vs CO2 Emissions')
plt.xlabel('Engine Size (L)')
plt.ylabel('CO2 Emissions (g/km)')

# 4. Vehicle Class distribution
plt.subplot(2, 3, 4)
vehicle_counts = df['VEHICLE CLASS'].value_counts().head(10)
plt.bar(range(len(vehicle_counts)), vehicle_counts.values)
plt.title('Top 10 Vehicle Classes')
plt.xticks(range(len(vehicle_counts)), vehicle_counts.index, rotation=45)
plt.ylabel('Count')

# 5. Fuel Type distribution
plt.subplot(2, 3, 5)
fuel_counts = df['FUEL'].value_counts()
plt.pie(fuel_counts.values, labels=fuel_counts.index, autopct='%1.1f%%')
plt.title('Fuel Type Distribution')

# 6. Cylinders vs Emissions
plt.subplot(2, 3, 6)
cylinder_emissions = df.groupby('CYLINDERS')['EMISSIONS'].mean()
plt.bar(cylinder_emissions.index, cylinder_emissions.values, color='green', alpha=0.7)
plt.title('Average Emissions by Cylinders')
plt.xlabel('Number of Cylinders')
plt.ylabel('Average CO2 Emissions (g/km)')

plt.tight_layout()
plt.show()


In [None]:
# Correlation analysis
plt.figure(figsize=(12, 8))

# Select numerical columns for correlation
numerical_cols = ['YEAR', 'ENGINE SIZE', 'CYLINDERS', 'FUEL CONSUMPTION', 
                  'HWY (L/100 km)', 'COMB (L/100 km)', 'COMB (mpg)', 'EMISSIONS']

correlation_matrix = df[numerical_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('Correlation Matrix of Numerical Features')
plt.show()

print("Correlation with EMISSIONS (target variable):")
print(correlation_matrix['EMISSIONS'].sort_values(ascending=False))


## Step 2: Generate Independent and Dependent Variables & Encode Categorical Variables


In [None]:
# Create a copy of the dataset for preprocessing
df_processed = df.copy()

# Remove unnecessary columns (MODEL is too specific, keeping MAKE)
columns_to_drop = ['MODEL']
df_processed = df_processed.drop(columns=columns_to_drop)

# Define target variable (dependent variable)
target_column = 'EMISSIONS'
y = df_processed[target_column].values

print(f"Target variable: {target_column}")
print(f"Target variable shape: {y.shape}")
print(f"Target variable statistics:")
print(f"Mean: {y.mean():.2f}, Std: {y.std():.2f}, Min: {y.min():.2f}, Max: {y.max():.2f}")

# Define feature columns (independent variables)
feature_columns = [col for col in df_processed.columns if col != target_column]
print(f"\nFeature columns: {feature_columns}")
print(f"Number of features: {len(feature_columns)}")


In [None]:
# Encode categorical variables
categorical_columns = ['MAKE', 'VEHICLE CLASS', 'TRANSMISSION', 'FUEL']
label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    df_processed[col] = le.fit_transform(df_processed[col])
    label_encoders[col] = le
    print(f"Encoded {col}: {len(le.classes_)} unique categories")
    print(f"Sample original values: {le.classes_[:5]}")
    print(f"Sample encoded values: {df_processed[col].unique()[:5]}\n")

# Create feature matrix (independent variables)
X = df_processed[feature_columns].values

print(f"Feature matrix shape: {X.shape}")
print(f"Features after encoding:")
for i, col in enumerate(feature_columns):
    print(f"{i}: {col}")

# Display first few rows of processed data
print(f"\nFirst 5 rows of processed features:")
print(pd.DataFrame(X[:5], columns=feature_columns))
