# Importing the initial libraries

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [37]:
raw_data = pd.read_csv('solar-flares-dataset.csv')
raw_data

Unnamed: 0,modified Zurich class,largest spot size,spot distribution,Activity,Evolution,Previous 24 hour flare activity code,Historically-complex,Did region become historically complex on this pass across the sun's disk,Area,. Area of the largest spot,C-Class Flares,M-class flares,X-class flares
0,H,A,X,1,3,1,1,1,1,1,0,0,0
1,D,R,O,1,3,1,1,2,1,1,0,0,0
2,C,S,O,1,3,1,1,2,1,1,0,0,0
3,H,R,X,1,2,1,1,1,1,1,0,0,0
4,H,S,X,1,1,1,1,2,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1061,H,S,X,1,2,1,1,1,1,1,0,0,0
1062,H,S,X,2,2,1,1,2,1,1,0,0,0
1063,C,S,O,1,2,1,2,2,1,1,0,0,0
1064,H,R,X,1,2,1,1,2,1,1,0,0,0


In [3]:
raw_data.columns

Index(['modified Zurich class', 'largest spot size', 'spot distribution',
       ' Activity', 'Evolution ', 'Previous 24 hour flare activity code ',
       'Historically-complex',
       'Did region become historically complex on this pass across the sun's disk',
       'Area', '. Area of the largest spot ', 'C-Class Flares',
       'M-class flares ', 'X-class flares'],
      dtype='object')

In [4]:
raw_data['C-Class Flares'].unique()

array([0, 1, 5, 2, 3, 8, 4, 6], dtype=int64)

In [5]:
raw_data.iloc[:, -2].unique()

array([0, 1, 4, 3, 2, 5], dtype=int64)

In [6]:
np.sum(raw_data.isnull().any()) # checking for any null values

0

In [7]:
raw_data.dtypes

modified Zurich class                                                        object
largest spot size                                                            object
spot distribution                                                            object
 Activity                                                                     int64
Evolution                                                                     int64
Previous 24 hour flare activity code                                          int64
Historically-complex                                                          int64
Did region become historically complex on this pass across the sun's disk     int64
Area                                                                          int64
. Area of the largest spot                                                    int64
C-Class Flares                                                                int64
M-class flares                                                              

In [8]:
for col in raw_data:
    print(col, np.unique(raw_data[col].values), '\n')

modified Zurich class ['B' 'C' 'D' 'E' 'F' 'H'] 

largest spot size ['A' 'H' 'K' 'R' 'S' 'X'] 

spot distribution ['C' 'I' 'O' 'X'] 

 Activity [1 2] 

Evolution  [1 2 3] 

Previous 24 hour flare activity code  [1 2 3] 

Historically-complex [1 2] 

Did region become historically complex on this pass across the sun's disk [1 2] 

Area [1 2] 

. Area of the largest spot  [1] 

C-Class Flares [0 1 2 3 4 5 6 8] 

M-class flares  [0 1 2 3 4 5] 

X-class flares [0 1 2] 



# Splitting the data into train and test

In [9]:
from sklearn.model_selection import train_test_split

x = raw_data.iloc[:, 0:-3]
y = raw_data.loc[:, ['C-Class Flares']]

In [36]:
y.head()

Unnamed: 0,C-Class Flares
0,0
1,0
2,0
3,0
4,0


In [11]:
print("x shape:", x.shape, "| y shape:", y.shape)

x shape: (1066, 10) | y shape: (1066, 1)


In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, random_state = 5)

In [13]:
x_train.head()

Unnamed: 0,modified Zurich class,largest spot size,spot distribution,Activity,Evolution,Previous 24 hour flare activity code,Historically-complex,Did region become historically complex on this pass across the sun's disk,Area,. Area of the largest spot
129,E,S,O,1,1,1,2,2,1,1
648,E,S,O,1,3,1,1,2,1,1
213,H,S,X,1,1,1,1,2,1,1
292,H,R,X,1,1,1,1,2,1,1
667,B,X,O,1,3,1,1,2,1,1


In [14]:
for col in x_train:
    print(col, np.unique(x_train[col].values), '\n')

modified Zurich class ['B' 'C' 'D' 'E' 'F' 'H'] 

largest spot size ['A' 'H' 'K' 'R' 'S' 'X'] 

spot distribution ['C' 'I' 'O' 'X'] 

 Activity [1 2] 

Evolution  [1 2 3] 

Previous 24 hour flare activity code  [1 2 3] 

Historically-complex [1 2] 

Did region become historically complex on this pass across the sun's disk [1 2] 

Area [1 2] 

. Area of the largest spot  [1] 



In [15]:
for col in x_test:
    print(col, np.unique(x_test[col].values), '\n')

modified Zurich class ['B' 'C' 'D' 'E' 'F' 'H'] 

largest spot size ['A' 'H' 'K' 'R' 'S' 'X'] 

spot distribution ['C' 'I' 'O' 'X'] 

 Activity [1 2] 

Evolution  [1 2 3] 

Previous 24 hour flare activity code  [1 2 3] 

Historically-complex [1 2] 

Did region become historically complex on this pass across the sun's disk [1 2] 

Area [1 2] 

. Area of the largest spot  [1] 



In [16]:
from sklearn.preprocessing import OrdinalEncoder

enc_all = OrdinalEncoder()
x_train_cat = x_train.copy().iloc[:, 0:3]
enc_all.fit(x_train_cat)
enc_all.categories_

[array(['B', 'C', 'D', 'E', 'F', 'H'], dtype=object),
 array(['A', 'H', 'K', 'R', 'S', 'X'], dtype=object),
 array(['C', 'I', 'O', 'X'], dtype=object)]

In [17]:
x_train_cat_enc = enc_all.transform(x_train_cat)

In [18]:
x_train_cat_enc

array([[3., 4., 2.],
       [3., 4., 2.],
       [5., 4., 3.],
       ...,
       [3., 2., 0.],
       [4., 2., 1.],
       [1., 4., 2.]])

In [34]:
x_train.loc[:, ['modified Zurich class']] = x_train_cat_enc[:, 0]
x_train.loc[:, ['largest spot size']] = x_train_cat_enc[:, 1]
x_train.loc[:, ['spot distribution']] = x_train_cat_enc[:, 2]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [20]:
x_test_cat = x_test.copy().iloc[:, 0:3]
x_test_cat_enc = enc_all.transform(x_test_cat)

In [21]:
x_test.loc[:, ['modified Zurich class']] = x_test_cat_enc[:, 0]
x_test.loc[:, ['largest spot size']] = x_test_cat_enc[:, 1]
x_test.loc[:, ['spot distribution']] = x_test_cat_enc[:, 2]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [22]:
print('x_train shape', x_train.shape)
print('x_test shape', x_test.shape)

x_train shape (714, 10)
x_test shape (352, 10)


In [23]:
x_test.head()

Unnamed: 0,modified Zurich class,largest spot size,spot distribution,Activity,Evolution,Previous 24 hour flare activity code,Historically-complex,Did region become historically complex on this pass across the sun's disk,Area,. Area of the largest spot
331,1.0,3.0,2.0,1,3,1,1,2,1,1
467,0.0,5.0,2.0,1,2,1,1,2,1,1
533,1.0,0.0,1.0,2,3,1,2,2,1,1
60,2.0,0.0,1.0,2,2,1,2,2,1,1
71,2.0,0.0,2.0,1,2,1,2,2,1,1


In [24]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver = 'lbfgs', alpha = 1e-5, hidden_layer_sizes = (5, 2), random_state = 1)

In [32]:
y_train.values.ravel()

array([0, 6, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 3, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 2, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 2, 0,
       0, 8, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,

In [33]:
clf.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(5, 2), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [27]:
y_pred = clf.predict(x_test)

In [28]:
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [29]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

0.8352272727272727