# Deep Learning
This notebook will demonstrate how to use DL techniques to find if a atmosphere is present of not.
The model is a Deep Nural Network model


### Load Dataset

In [1]:
!wget -O data.csv https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv


--2024-01-08 18:15:16--  https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 76466 (75K) [text/plain]
Saving to: ‘data.csv’


2024-01-08 18:15:16 (5.02 MB/s) - ‘data.csv’ saved [76466/76466]



In [2]:
# Import all required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix

In [3]:
# Load the dataset to dataframe
# The csv file don't consist of headers, So we need not make sure the first row is not considerd as a header
df = pd.read_csv('data.csv', header=None)

### Data Preprocessing

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1.0,0.0376,...,-0.51171,0.41078,-0.46168,0.21266,-0.3409,0.42267,-0.54487,0.18641,-0.453,g
1,1,0,1.0,-0.18829,0.93035,-0.36156,-0.10868,-0.93597,1.0,-0.04549,...,-0.26569,-0.20468,-0.18401,-0.1904,-0.11593,-0.16626,-0.06288,-0.13738,-0.02447,b
2,1,0,1.0,-0.03365,1.0,0.00485,1.0,-0.12062,0.88965,0.01198,...,-0.4022,0.58984,-0.22145,0.431,-0.17365,0.60436,-0.2418,0.56045,-0.38238,g
3,1,0,1.0,-0.45161,1.0,1.0,0.71216,-1.0,0.0,0.0,...,0.90695,0.51613,1.0,1.0,-0.20099,0.25682,1.0,-0.32382,1.0,b
4,1,0,1.0,-0.02401,0.9414,0.06531,0.92106,-0.23255,0.77152,-0.16399,...,-0.65158,0.1329,-0.53206,0.02431,-0.62197,-0.05707,-0.59573,-0.04608,-0.65697,g


In [5]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24,25,26,27,28,29,30,31,32,33
count,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,...,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0,351.0
mean,0.891738,0.0,0.641342,0.044372,0.601068,0.115889,0.550095,0.11936,0.511848,0.181345,...,0.396135,-0.071187,0.541641,-0.069538,0.378445,-0.027907,0.352514,-0.003794,0.349364,0.01448
std,0.311155,0.0,0.497708,0.441435,0.519862,0.46081,0.492654,0.52075,0.507066,0.483851,...,0.578451,0.508495,0.516205,0.550025,0.575886,0.507974,0.571483,0.513574,0.522663,0.468337
min,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,1.0,0.0,0.472135,-0.064735,0.41266,-0.024795,0.21131,-0.05484,0.08711,-0.048075,...,0.0,-0.33239,0.286435,-0.443165,0.0,-0.236885,0.0,-0.242595,0.0,-0.16535
50%,1.0,0.0,0.87111,0.01631,0.8092,0.0228,0.72873,0.01471,0.68421,0.01829,...,0.55389,-0.01505,0.70824,-0.01769,0.49664,0.0,0.44277,0.0,0.40956,0.0
75%,1.0,0.0,1.0,0.194185,1.0,0.334655,0.96924,0.445675,0.95324,0.534195,...,0.90524,0.156765,0.999945,0.153535,0.883465,0.154075,0.85762,0.20012,0.813765,0.17166
max,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Lets Explore the data

In [6]:
print(f'The Shape : {df.shape}')
print(f'Type of df: {type(df)}')
print(f'Column Types : \n {df.dtypes}')

The Shape : (351, 35)
Type of df: <class 'pandas.core.frame.DataFrame'>
Column Types : 
 0       int64
1       int64
2     float64
3     float64
4     float64
5     float64
6     float64
7     float64
8     float64
9     float64
10    float64
11    float64
12    float64
13    float64
14    float64
15    float64
16    float64
17    float64
18    float64
19    float64
20    float64
21    float64
22    float64
23    float64
24    float64
25    float64
26    float64
27    float64
28    float64
29    float64
30    float64
31    float64
32    float64
33    float64
34     object
dtype: object


In [7]:
# Check for null value
df.isnull().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
dtype: int64

In [8]:
# What all are the values in the labels
df[34].unique()

array(['g', 'b'], dtype=object)

In [9]:
# Is this is a balanced dataset
df.groupby(34).size()

34
b    126
g    225
dtype: int64

*This is not a balanced dataset*

In [10]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [11]:
# Encode the labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [12]:
y.size

351

Extract the features and labels

In [13]:
# Split the dataset to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=34)

print(f' X_train shape {X_train.shape}')
print(f' X_test shape {X_test.shape}')
print(f' y_train shape {y_train.shape}')
print(f' y_test shape {y_test.shape}')

 X_train shape (235, 34)
 X_test shape (116, 34)
 y_train shape (235,)
 y_test shape (116,)


### Model Creation

In [14]:
from sklearn.neural_network import MLPClassifier

In [15]:
model = MLPClassifier(
    hidden_layer_sizes=(10),
    activation = 'relu',
    random_state = 100,
    # weights = 'a',
    solver = 'adam',
)



fit and train the data

In [16]:
# fit the data
model.fit(X_train,y_train)

# predit
y_pred = model.predict(X_test)



Evaluate the model

In [17]:
from sklearn.metrics import accuracy_score, classification_report
y_pred.shape

# what is the score
score = model.score(X_test,y_test)
print(f'The model score is {score:.2f}')

# Lets see the cunfusion matrix
cm = confusion_matrix(y_test,y_pred)
print(f'The confusion matrix is :\n {cm}')

# What is the accuracy
accuracy = accuracy_score(y_test,y_pred)
print(f'accuracy is : {accuracy:.2f}')

# What is the classification report
cr = classification_report(y_test,y_pred)
print(f'The classification report is :\n{cr}')

The model score is 0.90
The confusion matrix is :
 [[23 11]
 [ 1 81]]
accuracy is : 0.90
The classification report is :
              precision    recall  f1-score   support

           0       0.96      0.68      0.79        34
           1       0.88      0.99      0.93        82

    accuracy                           0.90       116
   macro avg       0.92      0.83      0.86       116
weighted avg       0.90      0.90      0.89       116

