In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('../input/daily-sun-spot-data-1818-to-2019/sunspot_data.csv')

In [3]:
data.head(5)

In [4]:
data.info()

In [5]:

from pandas_profiling import ProfileReport
profile = ProfileReport(data, title="Profiling Report Sun_Spots",explorative=True)
profile.to_notebook_iframe()


In [6]:
data.columns

In [7]:
pip install sweetviz

In [8]:

import sweetviz as sv
my_report = sv.analyze([data,"Parameters"],target_feat='Number of Sunspots')
my_report.show_notebook()


# ML for Number Of SunSpots

In [9]:
data_11 = data.copy()
data_11.info()

In [10]:
X = data_11.drop('Number of Sunspots',axis=1)
Y = data_11['Number of Sunspots']

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [12]:

from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier() # Define classifier
knn.fit(X_train, y_train) # Train model

# Make predictions
y_train_pred = knn.predict(X_train)
y_test_pred = knn.predict(X_test)

# Training set performance
knn_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
knn_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
knn_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
knn_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
knn_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
knn_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: %s' % knn_train_accuracy)
print('- MCC: %s' % knn_train_mcc)
print('- F1 score: %s' % knn_train_f1)
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: %s' % knn_test_accuracy)
print('- MCC: %s' % knn_test_mcc)
print('- F1 score: %s' % knn_test_f1)


In [13]:

# Support vector machine (Radial basis function kernel)

from sklearn.svm import SVC

svm_rbf = SVC(gamma=2, C=1)
svm_rbf.fit(X_train, y_train)

# Make predictions
y_train_pred = svm_rbf.predict(X_train)
y_test_pred = svm_rbf.predict(X_test)

# Training set performance
svm_rbf_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
svm_rbf_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
svm_rbf_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
svm_rbf_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
svm_rbf_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
svm_rbf_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: %s' % svm_rbf_train_accuracy)
print('- MCC: %s' % svm_rbf_train_mcc)
print('- F1 score: %s' % svm_rbf_train_f1)
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: %s' % svm_rbf_test_accuracy)
print('- MCC: %s' % svm_rbf_test_mcc)
print('- F1 score: %s' % svm_rbf_test_f1)


In [14]:

# Decision tree
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=5) # Define classifier
dt.fit(X_train, y_train) # Train model

# Make predictions
y_train_pred = dt.predict(X_train)
y_test_pred = dt.predict(X_test)

# Training set performance
dt_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
dt_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
dt_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
dt_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
dt_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
dt_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: %s' % dt_train_accuracy)
print('- MCC: %s' % dt_train_mcc)
print('- F1 score: %s' % dt_train_f1)
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: %s' % dt_test_accuracy)
print('- MCC: %s' % dt_test_mcc)
print('- F1 score: %s' % dt_test_f1)


In [15]:
pip install git+https://github.com/pycaret/pycaret.git

In [16]:
from pycaret.regression import setup, compare_models, predict_model
pycaret_automl = setup(data=data, target = 'Number of Sunspots', session_id=667)

In [17]:
pycaret_models = compare_models(sort='MSE', budget_time=5)

In [18]:
print(pycaret_models)

In [19]:
from xgboost import XGBRegressor

In [20]:
model_1 = XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             monotone_constraints='()', n_estimators=100,
             n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=667,
             reg_alpha=0, reg_lambda=1)

In [21]:
model_1.fit(X_train,y_train)

In [22]:
model_1.score(X_test,y_test)

In [23]:
y_pred = model_1.predict(X)
model_1_compare = pd.DataFrame(data={'actual': data['Number of Sunspots'], 'predicted': y_pred})
model_1_compare.plot(figsize=(20, 12))

In [25]:
pip install autogluon

In [28]:
from autogluon.tabular import TabularPredictor
autogluon_automl = TabularPredictor(label='Number of Sunspots', problem_type='regression', eval_metric='mean_squared_error')
autogluon_automl.fit(train_data=data, time_limit=5*60)

In [29]:
autogluon_automl.leaderboard()

In [30]:
autogluon_automl.evaluate(data_11)

In [31]:
y_pred = autogluon_automl.predict(data)
autogluon_compare = pd.DataFrame(data={'actual': data['Number of Sunspots'], 'predicted': y_pred})
autogluon_compare.plot(figsize=(15, 12))