In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix


In [2]:
# Load the data from the CSV file
file_path = 'C:/Users/Eshwari/Downloads/Fastmarkets_2024_04_09-110937_nickel.xlsx'  # Replace with the actual file path
data = pd.read_excel(file_path)

# Display the first few rows of the data
data.head()

Unnamed: 0,Date,Price in USD/t,Year,Commodity,Country,Source
0,08/04/2024,17687.5,2024,Nickel,World,Fastmarkets
1,05/04/2024,17462.5,2024,Nickel,World,Fastmarkets
2,04/04/2024,17227.5,2024,Nickel,World,Fastmarkets
3,03/04/2024,16865.0,2024,Nickel,World,Fastmarkets
4,02/04/2024,16975.0,2024,Nickel,World,Fastmarkets


In [3]:
print(data.describe)
print(data.info())

<bound method NDFrame.describe of             Date  Price in USD/t  Year Commodity Country       Source
0     08/04/2024         17687.5  2024    Nickel   World  Fastmarkets
1     05/04/2024         17462.5  2024    Nickel   World  Fastmarkets
2     04/04/2024         17227.5  2024    Nickel   World  Fastmarkets
3     03/04/2024         16865.0  2024    Nickel   World  Fastmarkets
4     02/04/2024         16975.0  2024    Nickel   World  Fastmarkets
...          ...             ...   ...       ...     ...          ...
7115  08/01/1997          7062.5  1997    Nickel   World  Fastmarkets
7116  07/01/1997          6887.5  1997    Nickel   World  Fastmarkets
7117  06/01/1997          6752.5  1997    Nickel   World  Fastmarkets
7118  03/01/1997          6452.5  1997    Nickel   World  Fastmarkets
7119  02/01/1997          6361.0  1997    Nickel   World  Fastmarkets

[7120 rows x 6 columns]>
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7120 entries, 0 to 7119
Data columns (total 6 colu

In [4]:
# sort the dataframe in ascending order by 'Date'
df= data.sort_values(by='Date')

In [5]:
df.set_index('Date', inplace=True)


In [6]:
# Calculate the moving average
df['Moving_Avg'] = df['Price in USD/t'].rolling(window=5).mean()


In [7]:
df.head()

Unnamed: 0_level_0,Price in USD/t,Year,Commodity,Country,Source,Moving_Avg
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
01/01/1998,5987.5,1998,Nickel,World,Fastmarkets,
01/01/1999,4097.5,1999,Nickel,World,Fastmarkets,
01/01/2001,7187.5,2001,Nickel,World,Fastmarkets,
01/01/2002,5675.0,2002,Nickel,World,Fastmarkets,
01/01/2003,7095.0,2003,Nickel,World,Fastmarkets,6008.5


In [8]:

df.dropna(inplace=True)

In [9]:
# Assume 'target' is the name of the column containing the target variable
X = df[['Moving_Avg']]  # features
y = df['Price in USD/t']  # target variable



In [10]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
from sklearn.svm import SVR


# Initialize SVR (Support Vector Regressor)
#A larger value of gamma indicates that nearby support vectors have a stronger influence
#A larger value of C penalizes training errors more heavily
#Different kernels can capture different relationships between data points
svm_model = SVR(kernel='rbf', C=1000, gamma=0.1)  

# Reshape X_train and X_test to be 2D for SVR
X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)

# model training
svm_model.fit(X_train, y_train)


In [12]:
# Predict on the test set
y_pred = svm_model.predict(X_test)

# Calculate the accuracy or any other metric you're interested in
accuracy = svm_model.score(X_test, y_test)
print(f'Model Accuracy: {accuracy}')


Model Accuracy: 0.02194380424102016


In [13]:
df.reset_index(inplace=True)

In [14]:
# Extract the last date in the dataset
last_date = pd.to_datetime(df['Date'].iloc[-1])

# Generate dates for the next 10 days
next_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=20, freq='D')


  last_date = pd.to_datetime(df['Date'].iloc[-1])


In [15]:
last_date

Timestamp('2021-12-31 00:00:00')

In [16]:
next_dates

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
               '2022-01-09', '2022-01-10', '2022-01-11', '2022-01-12',
               '2022-01-13', '2022-01-14', '2022-01-15', '2022-01-16',
               '2022-01-17', '2022-01-18', '2022-01-19', '2022-01-20'],
              dtype='datetime64[ns]', freq='D')

In [17]:
# Convert dates to numerical format (ordinal)
next_dates_numeric = np.array(next_dates.to_numpy().reshape(-1, 1))


In [18]:
next_dates_numeric

array([['2022-01-01T00:00:00.000000000'],
       ['2022-01-02T00:00:00.000000000'],
       ['2022-01-03T00:00:00.000000000'],
       ['2022-01-04T00:00:00.000000000'],
       ['2022-01-05T00:00:00.000000000'],
       ['2022-01-06T00:00:00.000000000'],
       ['2022-01-07T00:00:00.000000000'],
       ['2022-01-08T00:00:00.000000000'],
       ['2022-01-09T00:00:00.000000000'],
       ['2022-01-10T00:00:00.000000000'],
       ['2022-01-11T00:00:00.000000000'],
       ['2022-01-12T00:00:00.000000000'],
       ['2022-01-13T00:00:00.000000000'],
       ['2022-01-14T00:00:00.000000000'],
       ['2022-01-15T00:00:00.000000000'],
       ['2022-01-16T00:00:00.000000000'],
       ['2022-01-17T00:00:00.000000000'],
       ['2022-01-18T00:00:00.000000000'],
       ['2022-01-19T00:00:00.000000000'],
       ['2022-01-20T00:00:00.000000000']], dtype='datetime64[ns]')

In [19]:
# Predict prices for the next 10 days
predicted_prices = svm_model.predict(next_dates_numeric)


In [20]:
predicted_prices

array([14137.48652794, 14137.48652794, 14137.48652794, 14137.48652794,
       14137.48652794, 14137.48652794, 14137.48652794, 14137.48652794,
       14137.48652794, 14137.48652794, 14137.48652794, 14137.48652794,
       14137.48652794, 14137.48652794, 14137.48652794, 14137.48652794,
       14137.48652794, 14137.48652794, 14137.48652794, 14137.48652794])

In [21]:
# Create a DataFrame with predicted prices and dates
predicted_data = pd.DataFrame({
    'Date': next_dates,
    'Predicted Price': predicted_prices
})


In [22]:
predicted_data.head()

Unnamed: 0,Date,Predicted Price
0,2022-01-01,14137.486528
1,2022-01-02,14137.486528
2,2022-01-03,14137.486528
3,2022-01-04,14137.486528
4,2022-01-05,14137.486528


In [23]:
# Combine historical data with forecasted data
historical_data = df[['Date', 'Price in USD/t']].copy()
forecasted_data = predicted_data[['Date', 'Predicted Price']].copy()

combined_data = pd.concat([historical_data, forecasted_data])


In [24]:
combined_data.tail()

Unnamed: 0,Date,Price in USD/t,Predicted Price
15,2022-01-16 00:00:00,,14137.486528
16,2022-01-17 00:00:00,,14137.486528
17,2022-01-18 00:00:00,,14137.486528
18,2022-01-19 00:00:00,,14137.486528
19,2022-01-20 00:00:00,,14137.486528


In [25]:
import plotly.graph_objects as go


# Plot using Plotly
fig = go.Figure()

# Add historical data trace
fig.add_trace(go.Scatter(
    x=historical_data['Date'],
    y=historical_data['Price in USD/t'],
    mode='lines',
    name='Historical Prices'
))

# Add forecasted data trace
fig.add_trace(go.Scatter(
    x=forecasted_data['Date'],
    y=forecasted_data['Predicted Price'],
    mode='lines',
    name='Forecasted Prices',
    line=dict(color='red', width=4)
))

# Update layout
fig.update_layout(
    title='Nickel Prices: Historical and Forecasted',
    xaxis_title='Date',
    yaxis_title='Price in USD/t'
)

# Show plot
fig.show()
