# Ionosphere Classification using Naive Bayes

## Objective:
To classify radar returns as good or bad using the Naive Bayes classification algorithm.


In [None]:
!pip install snowflake-connector-python



In [None]:
# ===============================
# 1. IMPORT LIBRARIES
# ===============================
import pandas as pd
import numpy as np
import snowflake.connector
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
conn = snowflake.connector.connect(
    user = 'SANTHIYA347',
    password = 'Santhiya@123456',
    account = 'RTFFDMY-GG82011',
    database ='IONOSPHERE ',
    schema ='PUBLIC',
    warehouse = 'COMPUTE_WH'
)

In [None]:
query = 'SELECT * FROM "IONOSPHERE"."PUBLIC"."IONO_SPHERE"'
df = pd.read_sql(query, conn)
print(df.head())
conn.close()


  df = pd.read_sql(query, conn)


   COLUMN_A  COLUMN_B  COLUMN_C  COLUMN_D  COLUMN_E  COLUMN_F  COLUMN_G  \
0      True     False   0.99539  -0.05889   0.85243   0.02306   0.83398   
1      True     False   1.00000  -0.18829   0.93035  -0.36156  -0.10868   
2      True     False   1.00000  -0.03365   1.00000   0.00485   1.00000   
3      True     False   1.00000  -0.45161   1.00000   1.00000   0.71216   
4      True     False   1.00000  -0.02401   0.94140   0.06531   0.92106   

   COLUMN_H  COLUMN_I  COLUMN_J  ...  COLUMN_Z  COLUMN_AA  COLUMN_AB  \
0  -0.37708   1.00000   0.03760  ...  -0.51171    0.41078   -0.46168   
1  -0.93597   1.00000  -0.04549  ...  -0.26569   -0.20468   -0.18401   
2  -0.12062   0.88965   0.01198  ...  -0.40220    0.58984   -0.22145   
3  -1.00000   0.00000   0.00000  ...   0.90695    0.51613    1.00000   
4  -0.23255   0.77152  -0.16399  ...  -0.65158    0.13290   -0.53206   

   COLUMN_AC  COLUMN_AD  COLUMN_AE  COLUMN_AF  COLUMN_AG  COLUMN_AH  COLUMN_AI  
0    0.21266   -0.34090    0.42267 

In [None]:
# Shape of dataset
print("Shape:", df.shape)

# Column info
df.info()

# Check missing values
df.isnull().sum()

# Statistical summary
df.describe()



Shape: (351, 35)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 351 entries, 0 to 350
Data columns (total 35 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   COLUMN_A   351 non-null    bool   
 1   COLUMN_B   351 non-null    bool   
 2   COLUMN_C   351 non-null    float64
 3   COLUMN_D   351 non-null    float64
 4   COLUMN_E   351 non-null    float64
 5   COLUMN_F   351 non-null    float64
 6   COLUMN_G   351 non-null    float64
 7   COLUMN_H   351 non-null    float64
 8   COLUMN_I   351 non-null    float64
 9   COLUMN_J   351 non-null    float64
 10  COLUMN_K   351 non-null    float64
 11  COLUMN_L   351 non-null    float64
 12  COLUMN_M   351 non-null    float64
 13  COLUMN_N   351 non-null    float64
 14  COLUMN_O   351 non-null    float64
 15  COLUMN_P   351 non-null    float64
 16  COLUMN_Q   351 non-null    float64
 17  COLUMN_R   351 non-null    float64
 18  COLUMN_S   351 non-null    float64
 19  COLUMN_T   351 non-null    float6

Unnamed: 0,COLUMN_C,COLUMN_D,COLUMN_E,COLUMN_F,COLUMN_G,COLUMN_H,COLUMN_I,COLUMN_J,COLUMN_K,COLUMN_L,...,COLUMN_Y,COLUMN_Z,COLUMN_AA,COLUMN_AB,COLUMN_AC,COLUMN_AD,COLUMN_AE,COLUMN_AF,COLUMN_AG,COLUMN_AH
count,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,...,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0
mean,0.641342,0.044372,0.601068,0.115889,0.550095,0.11936,0.511848,0.181345,0.476183,0.15504,...,0.396135,-0.071187,0.541641,-0.069538,0.378445,-0.027907,0.352514,-0.003794,0.349364,0.01448
std,0.497708,0.441435,0.519862,0.46081,0.492654,0.52075,0.507066,0.483851,0.563496,0.494817,...,0.578451,0.508495,0.516205,0.550025,0.575886,0.507974,0.571483,0.513574,0.522663,0.468337
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,0.472135,-0.064735,0.41266,-0.024795,0.21131,-0.05484,0.08711,-0.048075,0.02112,-0.065265,...,0.0,-0.33239,0.286435,-0.443165,0.0,-0.236885,0.0,-0.242595,0.0,-0.16535
50%,0.87111,0.01631,0.8092,0.0228,0.72873,0.01471,0.68421,0.01829,0.66798,0.02825,...,0.55389,-0.01505,0.70824,-0.01769,0.49664,0.0,0.44277,0.0,0.40956,0.0
75%,1.0,0.194185,1.0,0.334655,0.96924,0.445675,0.95324,0.534195,0.957895,0.482375,...,0.90524,0.156765,0.999945,0.153535,0.883465,0.154075,0.85762,0.20012,0.813765,0.17166
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
print(df.columns.tolist())


['COLUMN_A', 'COLUMN_B', 'COLUMN_C', 'COLUMN_D', 'COLUMN_E', 'COLUMN_F', 'COLUMN_G', 'COLUMN_H', 'COLUMN_I', 'COLUMN_J', 'COLUMN_K', 'COLUMN_L', 'COLUMN_M', 'COLUMN_N', 'COLUMN_O', 'COLUMN_P', 'COLUMN_Q', 'COLUMN_R', 'COLUMN_S', 'COLUMN_T', 'COLUMN_U', 'COLUMN_V', 'COLUMN_W', 'COLUMN_X', 'COLUMN_Y', 'COLUMN_Z', 'COLUMN_AA', 'COLUMN_AB', 'COLUMN_AC', 'COLUMN_AD', 'COLUMN_AE', 'COLUMN_AF', 'COLUMN_AG', 'COLUMN_AH', 'COLUMN_AI']


In [None]:
# Convert class labels to numeric
df['COLUMN_AI'] = df['COLUMN_AI'].map({'g': 1, 'b': 0})

# Features & target
X = df.drop('COLUMN_AI', axis=1)
y = df['COLUMN_AI']


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8732394366197183

Confusion Matrix:
 [[21  7]
 [ 2 41]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.75      0.82        28
           1       0.85      0.95      0.90        43

    accuracy                           0.87        71
   macro avg       0.88      0.85      0.86        71
weighted avg       0.88      0.87      0.87        71



## Conclusion:
The Naive Bayes classifier performed effectively on the Ionosphere dataset, achieving good accuracy.
Due to its probabilistic nature and assumption of feature independence, Naive Bayes is well-suited
for high-dimensional datasets like Ionosphere.
