# Fire Classification Week 1 Notebook

This notebook analyzes MODIS fire data from **2021–2023** for India.  
We perform:
- Dataset loading & merging
- Basic cleaning (duplicates)
- Exploratory Data Analysis (EDA)
- Visualization of fire types and confidence values


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Show plots inline
%matplotlib inline

In [None]:
# Load MODIS fire data
df1 = pd.read_csv('modis_2021_India.csv')
df2 = pd.read_csv('modis_2022_India.csv')
df3 = pd.read_csv('modis_2023_India.csv')

# Merge datasets
df = pd.concat([df1, df2, df3], ignore_index=True)
df.drop_duplicates(inplace=True)

print('Data Shape:', df.shape)
df.head()

In [None]:
# Check dataset info
df.info()

# Describe numerical columns
df.describe()

# Fire type counts
df['type'].value_counts()

In [None]:
# Count plot of fire types
plt.figure(figsize=(6,4))
sns.countplot(x='type', data=df)
plt.title('Fire Type Distribution')
plt.show()

# Histogram of confidence
plt.figure(figsize=(6,4))
sns.histplot(df['confidence'], bins=20, kde=True)
plt.title('Confidence Distribution')
plt.show()

# Observations

- The dataset has **271217 rows and 15 columns**.
- Fire type distribution: {0: 257625, 2: 13550, 3: 42}.
- No missing values found in the dataset.
- `type 0` (vegetation fires) is dominant, while types 2 and 3 are rare.
- Confidence values range from 0 to 100 and have peaks around 50+.


In [None]:
# Week 2 - Fire Type Classification (Till SMOTE)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import SMOTE

# Load Dataset
df = pd.read_csv("https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/Fire.csv")
df.head()

# Basic Info
df.info()
df.isnull().sum()

# Remove duplicates and drop unnecessary columns
df.drop_duplicates(inplace=True)
df.drop(columns=["Unnamed: 0", "country", "state", "month", "day", "year"], inplace=True)
df.head()

# Encode Target
le = LabelEncoder()
df['fire_type'] = le.fit_transform(df['fire_type'])
df['fire_type'].value_counts()

# Correlation Heatmap
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

# Features and Target
X = df.drop("fire_type", axis=1)
y = df["fire_type"]

# Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SMOTE (Balancing the Data)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

# Confirm class distribution after SMOTE
print(y_train_resampled.value_counts())
