<a href="https://colab.research.google.com/github/Challakrupajyothi/Stock_prediction/blob/main/StockPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Collecting Data from API**

In [33]:
import requests
import pandas as pd
from datetime import datetime

# Replace 'YOUR_API_KEY' with your actual CryptoCompare API key
API_KEY = '103611374f4b59bc246db96c334a316a2e5cb0890ee2f56437a14532e28fe56f'
BASE_URL = 'https://min-api.cryptocompare.com/data/v2/histoday'

def fetch_crypto_data(crypto_pair, start_date):
    """
    Fetch daily historical data for a specific cryptocurrency pair.

    Parameters:
    - crypto_pair (str): Cryptocurrency pair, e.g., "BTC/USD".
    - start_date (str): Start date in "YYYY-MM-DD" format.

    Returns:
    - DataFrame: Data with columns Date, Open, High, Low, Close.
    """
    fsym, tsym = crypto_pair.split('/')  # Extract base and quote currencies
    limit = 2000  # Maximum daily records per API call (adjustable)

    # Request historical data
    params = {
        'fsym': fsym,
        'tsym': tsym,
        'limit': limit,
        'toTs': int(datetime.strptime(start_date, "%Y-%m-%d").timestamp()),
        'api_key': API_KEY
    }
    response = requests.get(BASE_URL, params=params)
    data = response.json()
    # print(data)

    # Parse and format data
    if 'Data' in data['Data']:
        ohlc_data = data['Data']['Data']
        df = pd.DataFrame(ohlc_data)
        df['Date'] = pd.to_datetime(df['time'], unit='s')
        df = df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close'})
        return df[['Date', 'Open', 'High', 'Low', 'Close']]
    else:
        print("Error fetching data:", data)
        return None

# Example usage
crypto_pair = "BTC/USD"
start_date = "2024-01-01"
df = fetch_crypto_data(crypto_pair, start_date)
print(df.head())

        Date     Open     High      Low    Close
0 2018-07-11  6306.87  6405.59  6293.68  6394.36
1 2018-07-12  6394.36  6394.93  6084.00  6253.60
2 2018-07-13  6253.66  6349.21  6131.54  6229.83
3 2018-07-14  6229.61  6332.46  6190.18  6268.75
4 2018-07-15  6268.32  6401.50  6245.75  6364.26


# Genarating the new columns form the existing columns as per my requirement

In [34]:
import pandas as pd

def calculate_crypto_metrics(df, variable1, variable2):
    # Historical High Price for look-back period
    df[f'High_Last_{variable1}_Days'] = df['High'].rolling(window=variable1, min_periods=1).max()

    # Days Since High for look-back period
    df[f'Days_Since_High_Last_{variable1}_Days'] = (
        df.groupby((df['High'] == df[f'High_Last_{variable1}_Days']).cumsum()).cumcount()
    )

    # % Difference from Historical High
    df[f'%_Diff_From_High_Last_{variable1}_Days'] = (
        (df['Close'] - df[f'High_Last_{variable1}_Days']) / df[f'High_Last_{variable1}_Days'] * 100
    )

    # Historical Low Price for look-back period
    df[f'Low_Last_{variable1}_Days'] = df['Low'].rolling(window=variable1, min_periods=1).min()

    # Days Since Low for look-back period
    df[f'Days_Since_Low_Last_{variable1}_Days'] = (
        df.groupby((df['Low'] == df[f'Low_Last_{variable1}_Days']).cumsum()).cumcount()
    )

    # % Difference from Historical Low
    df[f'%_Diff_From_Low_Last_{variable1}_Days'] = (
        (df['Close'] - df[f'Low_Last_{variable1}_Days']) / df[f'Low_Last_{variable1}_Days'] * 100
    )

    # Future High Price for look-forward period
    df[f'High_Next_{variable2}_Days'] = df['High'].shift(-variable2).rolling(window=variable2, min_periods=1).max()

    # % Difference from Future High
    df[f'%_Diff_From_High_Next_{variable2}_Days'] = (
        (df['Close'] - df[f'High_Next_{variable2}_Days']) / df[f'High_Next_{variable2}_Days'] * 100
    )

    # Future Low Price for look-forward period
    df[f'Low_Next_{variable2}_Days'] = df['Low'].shift(-variable2).rolling(window=variable2, min_periods=1).min()

    # % Difference from Future Low
    df[f'%_Diff_From_Low_Next_{variable2}_Days'] = (
        (df['Close'] - df[f'Low_Next_{variable2}_Days']) / df[f'Low_Next_{variable2}_Days'] * 100
    )

    return df

# Sample usage with a sample DataFrame

result_df = calculate_crypto_metrics(df, variable1=7, variable2=5)

# Filling Null Values

In [35]:
df.isnull().sum()
df=df.fillna(0)

# Training the model

In [36]:
# ml_model.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Train the model function
def train_model(data,var1,var2):
    """
    Trains a machine learning model to predict:
    - %_Diff_From_High_Next_{variable2}_Days
    - %_Diff_From_Low_Next_{variable2}_Days

    Parameters:
    data (DataFrame): Data containing input features and target columns.

    Returns:
    model (LinearRegression): Trained machine learning model.
    accuracy (float): The model's accuracy (based on mean squared error).
    """

    # Define features and target variables
    # data=data.fillna(0)
    X = data[[f'Days_Since_High_Last_{var1}_Days',
              f'%_Diff_From_High_Last_{var1}_Days',
              f'Days_Since_Low_Last_{var1}_Days',
              f'%_Diff_From_Low_Last_{var1}_Days']]
    # data.fillna("Nan",0)
    y = data[[f'%_Diff_From_High_Next_{var2}_Days', f'%_Diff_From_Low_Next_{var2}_Days']]


    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict and evaluate the model's accuracy
    y_pred = model.predict(X_test)
    accuracy = mean_squared_error(y_test, y_pred)

    print(f"Model MSE: {accuracy}")
    return model, accuracy
train_model(df,7,5)

Model MSE: 43.595407434758926


(LinearRegression(), 43.595407434758926)

# Prediction for give Data

In [37]:
# Prediction function
def predict_outcomes(data,features,var1,var2):
    """
    Predicts %_Diff_From_High_Next_{variable2}_Days and %_Diff_From_Low_Next_{variable2}_Days.

    Parameters:
    model (LinearRegression): Trained model.
    features (list or DataFrame): Input features for prediction.

    Returns:
    predictions (ndarray): Predicted values for both targets.
    """
    data=data.fillna(0)
    X = data[[f'Days_Since_High_Last_{var1}_Days',
              f'%_Diff_From_High_Last_{var1}_Days',
              f'Days_Since_Low_Last_{var1}_Days',
              f'%_Diff_From_Low_Last_{var1}_Days']]
    # data.fillna("Nan",0)
    y = data[[f'%_Diff_From_High_Next_{var2}_Days', f'%_Diff_From_Low_Next_{var2}_Days']]


    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the model
    model = LinearRegression()
    model.fit(X_train, y_train)
    feature_name=[f'Days_Since_High_Last_{var1}_Days',
              f'%_Diff_From_High_Last_{var1}_Days',
              f'Days_Since_Low_Last_{var1}_Days',
              f'%_Diff_From_Low_Last_{var1}_Days']
    f_df=pd.DataFrame([features],columns=feature_name)

    # Predict and evaluate the model's accuracy
    predictions = model.predict(f_df)
    return predictions
predict_outcomes(df,[0,-1.556,0,2.6565],7,5)

array([[-3.67209871,  3.72060791]])