In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('/kaggle/input/electric-vehicle-charging-patterns/ev_charging_patterns.csv')

# Title
print("⚡ Electric Vehicle Charging Patterns: Comprehensive EDA")

# --- Summary & Insights ---
print("\n📊 Summary & Insights")

# Summary statistics
print("\n1. 📑 Summary Statistics")
print(data.describe())

# Correlation matrix
print("\n2. 🔗 Correlation Heatmap")
numeric_data = data.select_dtypes(include=['float64', 'int64'])
numeric_data = numeric_data.fillna(0)
corr_matrix = numeric_data.corr()
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
plt.show()

# Top Charger Types
print("\n3. 🔌 Top Charger Types by Usage")
charger_counts = data['Charger Type'].value_counts()
charger_counts.plot(kind='bar')
plt.title("Top Charger Types by Usage")
plt.ylabel("Frequency")
plt.show()

# --- Visualizations ---
print("\n📈 Detailed Visualizations")

# Charging Cost Distribution
print("\n1. 💰 Distribution of Charging Cost")
bins = 20  # Adjust bin count here
fig, ax = plt.subplots()
ax.hist(data['Charging Cost (USD)'], bins=bins, color='skyblue', edgecolor='black')
ax.set_title("Histogram of Charging Cost")
ax.set_xlabel("Charging Cost (USD)")
ax.set_ylabel("Frequency")
plt.show()

# Energy Consumed vs Charging Duration
print("\n2. ⚙️ Energy Consumed vs Charging Duration")
fig, ax = plt.subplots()
sns.scatterplot(data=data, x='Charging Duration (hours)', y='Energy Consumed (kWh)', hue='Charger Type', palette='viridis', ax=ax)
ax.set_title("Energy Consumed vs Charging Duration")
ax.set_xlabel("Charging Duration (hours)")
ax.set_ylabel("Energy Consumed (kWh)")
plt.show()

# Boxplot of Charging Rate by Charger Type
print("\n3. 📦 Boxplot of Charging Rate by Charger Type")
fig, ax = plt.subplots(figsize=(12, 6))
sns.boxplot(data=data, x='Charger Type', y='Charging Rate (kW)', ax=ax)
plt.xticks(rotation=45, ha='right')
plt.show()

# Time of Day Analysis
print("\n4. ⏰ Charging Sessions by Time of Day")
time_of_day_counts = data['Time of Day'].value_counts()
time_of_day_counts.plot(kind='bar')
plt.title("Charging Sessions by Time of Day")
plt.ylabel("Frequency")
plt.show()

# --- Prediction ---
print("\n🔮 Predicting Energy Consumed")

# Preparing data for prediction
features = ['Charging Duration (hours)', 'Charging Rate (kW)', 'Temperature (°C)', 'Vehicle Age (years)']
target = 'Energy Consumed (kWh)'

# Handle missing target values
if data['Energy Consumed (kWh)'].isnull().any():
    print("Missing values found in the target column 'Energy Consumed (kWh)'. Filling with the column mean.")
    data['Energy Consumed (kWh)'] = data['Energy Consumed (kWh)'].fillna(data['Energy Consumed (kWh)'].mean())

# Check if required columns are in the dataset
missing_features = [col for col in features if col not in data.columns]
if missing_features:
    print(f"Missing columns for prediction: {', '.join(missing_features)}")
elif target not in data.columns:
    print(f"Target column '{target}' not found in the dataset.")
else:
    X = data[features].fillna(0)  # Ensure features have no missing values
    y = data['Energy Consumed (kWh)']

    # Splitting the dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Model training
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"\n📊 Model Performance")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"R-squared: {r2:.2f}")

    # User input for prediction
    input_data = {feature: 1.0 for feature in features}  # Example input values
    input_df = pd.DataFrame([input_data])

    prediction = model.predict(input_df)[0]
    print(f"\nPredicted Energy Consumed: {prediction:.2f} kWh")

# Conclusion
print("\n📚 Conclusion")
print("Thank you for exploring Electric Vehicle Charging Patterns. We hope these insights and predictions help in better understanding EV charging behaviors.")
