In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn import datasets
from sklearn import model_selection
from sklearn import preprocessing

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.metrics import mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
seed_value = 7

import os
os.environ['PYTHONHASHSEED']=str(seed_value)
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import random
random.seed(seed_value)

np.random.seed(seed_value)

import tensorflow as tf
tf.random.set_seed(seed_value)

import keras.backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [None]:
data = pd.read_csv('OnlineNewsPopularity.csv')

In [None]:
data.columns = data.columns.str.lstrip()
data = data.drop(labels=['url', 'timedelta'], axis=1)

In [None]:
sns.boxplot(x=data['shares'])
plt.show()

In [None]:
data.shape

In [None]:
#for attribute in numeric_attributes:
    # Izračunavanje granica za definisanje outliera
Q1 = data['shares'].quantile(0.25)
Q3 = data['shares'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

    # Filtriranje podataka bez outliera
data = data[(data['shares'] >= lower_bound) & (data['shares'] <= upper_bound)]

In [None]:
sns.boxplot(x=data['shares'])
plt.show()

In [None]:
data.shares.mean()

In [None]:
data.shape

In [None]:
y = data.shares
x = data.drop(labels=['shares'], axis =1)

In [None]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.33, random_state=7)

In [None]:
scaler = preprocessing.StandardScaler()

In [None]:
scaler.fit(x_train)

In [None]:
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
number_of_features = x_train.shape[1]
output_size = 1

In [None]:
model = Sequential()
model.add(Input(shape=(number_of_features, )))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=output_size))

In [None]:
model.summary()

In [None]:
model.compile(loss=mean_squared_error, optimizer=Adam(learning_rate=0.0001), metrics=[mean_absolute_error])

In [None]:
early_stopping = EarlyStopping(monitor='mean_absolute_error', patience=5, min_delta = 20)

In [None]:
history = model.fit(x_train, y_train, epochs=50, validation_split=0.2, verbose = 1, callbacks=[early_stopping])

In [None]:
train_scores = model.evaluate(x_train, y_train, batch_size=32)
train_scores

In [None]:
test_scores = model.evaluate(x_test, y_test, batch_size=32)
test_scores

In [None]:
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.title('Loss')
plt.plot(history.epoch, history.history['loss'], label='training')
plt.plot(history.epoch, history.history['val_loss'], label='validation')
plt.legend(loc='best')

plt.subplot(1, 2, 2)
plt.title('Mae')
plt.plot(history.epoch, history.history['mean_absolute_error'], label='training')
plt.plot(history.epoch, history.history['val_mean_absolute_error'], label='validation')
plt.legend(loc='best')

plt.show()