# Merge Data

In [8]:
import pandas as pd

# Load CSV files into dataframes
MergeX = pd.read_csv('MergeX.csv')
MergeY = pd.read_csv('MergeY.csv')

# convert the dates to pandas datetime objects
MergeX['date'] = pd.to_datetime(MergeX['date'], format='%Y/%m/%d')
MergeY['date'] = pd.to_datetime(MergeY['date'], format='%Y/%m/%d')

# adjust the dates to the beginning of the week
MergeX['week_start_date'] = MergeX['date'] - pd.to_timedelta(MergeX['date'].dt.dayofweek, unit='d')
MergeY['week_start_date'] = MergeY['date'] - pd.to_timedelta(MergeY['date'].dt.dayofweek, unit='d')

# now you can merge the two dataframes on the week_start_date column
merged = pd.merge(MergeX, MergeY, on='week_start_date', how='outer')

# if you want to sort the merged dataframe by week_start_date
merged = merged.sort_values(by='week_start_date')

# Export the merged data to a CSV file
#merged.to_csv('merged.csv', index=False)


# Linear Regression

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# Load CSV file into dataframe
data = pd.read_csv('cleanmerged.csv')

# Fill missing values in 'Y' column with 0
data['Y'] = data['Y'].fillna(0)

# Extract X and Y
X = data['X'].values.reshape(-1,1)
Y = data['Y'].values.reshape(-1,1)

# Split the data into training/testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Create a linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, Y_train)

# Make predictions using the testing set
Y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)

# The mean squared error
print('Mean squared error: %.2f'
      % metrics.mean_squared_error(Y_test, Y_pred))

# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % metrics.r2_score(Y_test, Y_pred))

Coefficients: 
 [[0.03117071]]
Mean squared error: 0.00
Coefficient of determination: -0.11


# Daily Lithium Price

In [13]:
# Import the required library
import pandas as pd

# Load the data
df = pd.read_csv('Lithium_futures_price 1.csv')

# Convert the 'date' column to datetime if it's not
df['Date'] = pd.to_datetime(df['Date'])

# Ensure data is sorted by date
df = df.sort_values('Date')

# Set the 'date' column as the index of the DataFrame
df.set_index('Date', inplace=True)

# Check the first few rows of the DataFrame
print(df.head())

# Provide a statistical summary of the 'close' prices
summary = df['Close'].describe()

print("\nStatistical summary:")
print(summary)


             Close
Date              
2017-05-10  136000
2017-05-11  136000
2017-05-12  136000
2017-05-15  136000
2017-05-16  136000

Statistical summary:
count      1496.00000
mean     176106.11631
std      160633.04864
min       39000.00000
25%       65500.00000
50%       97000.00000
75%      192500.00000
max      597500.00000
Name: Close, dtype: float64


# Weekly Lithium Price

In [20]:
# Import the required library
import pandas as pd

# Load the data
df = pd.read_csv('weeklylog.csv')

# Convert the 'date' column to datetime if it's not
df['date'] = pd.to_datetime(df['date'])

# Ensure data is sorted by date
df = df.sort_values('date')

# Set the 'date' column as the index of the DataFrame
df.set_index('date', inplace=True)

# Check the first few rows of the DataFrame
print(df.head())

# Provide a statistical summary of the 'Close' prices
summary = df['Close'].describe()

print("\nStatistical summary:")
print(summary)


            logreturn     Close
date                           
2017-05-10        NaN  136000.0
2017-05-15   0.003054  141000.0
2017-05-22   0.000000  141000.0
2017-05-30   0.001188  143000.0
2017-06-05   0.000879  144500.0

Statistical summary:
count       310.000000
mean     175441.935484
std      160724.590112
min       39000.000000
25%       65500.000000
50%       97500.000000
75%      190000.000000
max      597500.000000
Name: Close, dtype: float64
