#Fetch and Store DataSet

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Function to get gas price data for the last 90 days
def get_gas_price_data():
    end_timestamp = int(datetime.now().timestamp())  # Current time
    start_timestamp = int((datetime.now() - timedelta(days=180)).timestamp())  # Time 90 days ago

    # Fetch gas price data from Owlracle API
    url = f'https://api.owlracle.info/v3/eth/history?to={end_timestamp}&from={start_timestamp}&timeframe=60&candles=1000'
    res = requests.get(url)

    if res.status_code == 200:
        data = res.json()
    else:
        raise Exception(f"Error fetching gas price data: {res.status_code}")

    df_gas = pd.DataFrame(data)

    # Convert 'timestamp' from ISO 8601 string to datetime and localize to UTC if not already localized
    df_gas['timestamp'] = pd.to_datetime(df_gas['timestamp'])
    if df_gas['timestamp'].dt.tz is None:  # Check if tz is None (timezone-naive)
        df_gas['timestamp'] = df_gas['timestamp'].dt.tz_localize('UTC')  # Localizing to UTC

    # Expand gasPrice column into separate columns
    gas_price_df = pd.json_normalize(df_gas['gasPrice'])  # Extract dictionary into DataFrame
    df_gas = df_gas.join(gas_price_df)  # Join the new columns to the original DataFrame

    # Drop the original 'gasPrice' column
    df_gas = df_gas.drop(columns=['gasPrice'])

    return df_gas

# Function to get historical price data for BTC and ETH
def get_crypto_price_data(crypto_id, vs_currency='usd', days=180):
    url = f'https://api.coingecko.com/api/v3/coins/{crypto_id}/market_chart'
    params = {
        'vs_currency': vs_currency,
        'days': days,
        'interval': 'daily'
    }
    res = requests.get(url, params=params)
    if res.status_code == 200:
        data = res.json()
    else:
        raise Exception(f"Error fetching {crypto_id} price data: {res.status_code}")

    # Convert the data into a DataFrame
    prices = data['prices']
    df_prices = pd.DataFrame(prices, columns=['timestamp', 'price'])

    # Convert 'timestamp' from milliseconds to datetime and localize to UTC if not already localized
    df_prices['timestamp'] = pd.to_datetime(df_prices['timestamp'], unit='ms')
    if df_prices['timestamp'].dt.tz is None:  # Check if tz is None (timezone-naive)
        df_prices['timestamp'] = df_prices['timestamp'].dt.tz_localize('UTC')  # Localizing to UTC
    return df_prices

# Collecting gas price data
df_gas = get_gas_price_data()

# Collecting BTC and ETH price data
df_btc = get_crypto_price_data('bitcoin')
df_eth = get_crypto_price_data('ethereum')

# Merging all datasets on the timestamp
df_combined = pd.merge_asof(
    df_gas.sort_values('timestamp'),
    df_btc.rename(columns={'price': 'btc_price'}).sort_values('timestamp'),
    on='timestamp',
    direction='backward'
)

df_combined = pd.merge_asof(
    df_combined,
    df_eth.rename(columns={'price': 'eth_price'}).sort_values('timestamp'),
    on='timestamp',
    direction='backward'
)

# Save the combined data to a single CSV file
df_combined.to_csv('combined_gas_btc_eth_prices_last_90_days.csv', index=False)

# Optionally, print a message indicating successful export
print("Combined CSV file has been created: combined_gas_btc_eth_prices_last_90_days.csv")


Combined CSV file has been created: combined_gas_btc_eth_prices_last_90_days.csv


#Model Creation

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the saved CSV file
df_combined = pd.read_csv('combined_gas_btc_eth_prices_last_90_days.csv')

# Convert timestamp to datetime and extract time-based features
df_combined['timestamp'] = pd.to_datetime(df_combined['timestamp'])
df_combined['hour'] = df_combined['timestamp'].dt.hour
df_combined['day_of_week'] = df_combined['timestamp'].dt.dayofweek

# Feature Engineering: Add new features
df_combined['price_diff'] = df_combined['close'] - df_combined['open']
df_combined['high_low_diff'] = df_combined['high'] - df_combined['low']

# Select features and target variable
X = df_combined[['samples', 'open', 'close', 'low', 'high', 'btc_price', 'eth_price', 'hour', 'day_of_week', 'price_diff', 'high_low_diff']]
y = df_combined['avgGas']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train a Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Print the predictions for inspection
predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("\nPredictions:")
print(predictions_df.head())


Mean Squared Error: 71663358.42033212
R-squared: 0.5256091290179327

Predictions:
            Actual      Predicted
521   93108.363555   94976.268748
737   96801.218848   96007.975860
740   98165.916783   96203.441585
660   95608.546419   92903.139228
411  115883.360675  112416.667929
