In [23]:
!pip install giza-datasets
!pip install git+https://github.com/gizatechxyz/datasets
!pip install xgboost
!pip install scikit-learn
!pip install polars


Collecting git+https://github.com/gizatechxyz/datasets
  Cloning https://github.com/gizatechxyz/datasets to c:\users\sakshi panchal\appdata\local\temp\pip-req-build-cvojyozn
  Resolved https://github.com/gizatechxyz/datasets to commit 5dcfb1c8c615d130bb5381798428534633f51337
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/gizatechxyz/datasets 'C:\Users\sakshi panchal\AppData\Local\Temp\pip-req-build-cvojyozn'




In [24]:
import os
import certifi
import polars as pl
import pandas as pd
from giza.datasets import DatasetsHub, DatasetsLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb
import datetime

# Set SSL_CERT_FILE environment variable
os.environ['SSL_CERT_FILE'] = certifi.where()


In [25]:
# Initialize the DatasetsHub and DatasetsLoader
hub = DatasetsHub()
loader = DatasetsLoader()

# List available tags and datasets
print(hub.list_tags())

# Load the specific datasets
datasets = ['tokens-ohcl', 'tokens-daily-prices-mcap-volume', 'top-pools-apy-per-protocol', 'tvl-per-project-tokens/project=lido']
dfs = []
for dataset_name in datasets:
    df = loader.load(dataset_name)
    dfs.append(df.to_pandas())  # Convert to pandas DataFrame for convenience

# Combine datasets into a single DataFrame (Example concatenation, adjust based on your actual schema)
df_combined = pd.concat(dfs, axis=1)
print(df_combined.head())


['Morpho-aavev3', 'Yearn-finance', 'Optimism', 'flamincome', 'coinwind', 'Gnosis', 'Binance-staked-eth', 'Avalanche', 'lido', 'Origin-ether', 'compound-v2', 'Balancer-v1', 'Radiant-v2', 'Deposits', 'sushiswap', 'balancer-v2', 'Polygon', 'DEX', 'Thorchain', 'Borrows & Deposits', 'pancakeswap-amm', 'Multi-chain', 'Morpho-aave', 'Pancakeswap-amm', 'Frax-ether', 'Beefy', 'curve-dex', 'Swap Fees', 'PancakeSwap', 'Fees', 'Yearn-v2', 'Yield', 'beefy', 'Mcap', 'Flamincome', 'Aura', 'pendle', 'Farcaster', 'yearn-finance', 'Pendle', 'rocket-pool', 'aave-v3', 'Arbitrum', 'Sommelier', 'Uniswap-v3', 'Sushiswap', 'Dexes', 'Compound-v3', 'Liquiditations', 'Spark', 'Mantle-staked-eth', 'Lending', 'APY', 'Benqi-lending', 'Aave-v2', 'Curve', 'Penpie', 'TVL', 'Trade Volume', 'Yield Aggregator', 'uniswap-v2', 'Coinwind', 'Lido', 'Ethereum', 'Token Price', 'Balancer-v2', 'DeFi', 'uniswap-v3', 'benqi-lending', 'Pancakeswap-amm-v3', 'Compound-v2', 'Liquid Staking', 'price', 'Convex-finance', 'Aave-v3', 'dail

In [26]:
# Check the column names
print(df_combined.columns)

# Rename duplicate columns to avoid conflicts
df_combined.columns = ['date1', 'Open', 'High', 'Low', 'Close', 'token1', 'date2', 'price',
                       'market_cap', 'volumes_last_24h', 'token2', 'date3', 'tvlUsd', 'apy',
                       'project1', 'underlying_token', 'chain', 'DOT', 'KSM', 'LUNC', 'MATIC',
                       'SOL', 'WETH', 'date4', 'project2']

# Example feature engineering; adjust based on your dataset
df_combined['date1'] = pd.to_datetime(df_combined['date1'])
df_combined['day_of_week'] = df_combined['date1'].dt.dayofweek
df_combined['month'] = df_combined['date1'].dt.month

# Dummy target variable for illustration purposes
df_combined['target'] = (df_combined['price'] > df_combined['price'].median()).astype(int)

# Define features and target
features = df_combined[['day_of_week', 'month', 'price', 'market_cap', 'volumes_last_24h', 'tvlUsd', 'apy']]
target = df_combined['target']


Index(['date', 'Open', 'High', 'Low', 'Close', 'token', 'date', 'price',
       'market_cap', 'volumes_last_24h', 'token', 'date', 'tvlUsd', 'apy',
       'project', 'underlying_token', 'chain', 'DOT', 'KSM', 'LUNC', 'MATIC',
       'SOL', 'WETH', 'date', 'project'],
      dtype='object')


In [27]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)


In [28]:
# Initialize the XGBoost model with additional parameters
model = xgb.XGBClassifier(
    objective='binary:logistic',
    random_state=42,
    eta=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    n_estimators=100
)

# Train the model
model.fit(X_train, y_train)


In [29]:
# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy}")

# Generate a classification report
report = classification_report(y_test, predictions)
print("Classification Report:")
print(report)


Model Accuracy: 0.9998220046723774
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     37573
           1       1.00      1.00      1.00      7372

    accuracy                           1.00     44945
   macro avg       1.00      1.00      1.00     44945
weighted avg       1.00      1.00      1.00     44945



In [30]:
# Save the model to a file
#model.save_model('model.json')


In [31]:

investment_amount = float(input("Enter the amount to be invested: "))
volatility_list = input("Enter the list of volatilities (comma separated): ").split(',')
duration = int(input("Enter the duration of investment in days: "))
target_return = float(input("Enter your target return percentage: "))
risk_tolerance = input("Enter your risk tolerance (low, medium, high): ")

# Example processing of the volatility list
avg_volatility = sum(map(float, volatility_list)) / len(volatility_list)

# Create a sample input based on user inputs and existing features
sample_input = pd.DataFrame({
    'day_of_week': [datetime.datetime.now().weekday()],
    'month': [datetime.datetime.now().month],
    'price': [df_combined['price'].median()],
    'market_cap': [df_combined['market_cap'].median()],
    'volumes_last_24h': [df_combined['volumes_last_24h'].median()],
    'tvlUsd': [df_combined['tvlUsd'].median()],
    'apy': [df_combined['apy'].median()],
    'amount_to_invest': [investment_amount],
    'duration_of_investment': [duration],
    'volatility': [avg_volatility]
})

# Ensure the sample_input matches the features used in training
sample_input = sample_input[features.columns]

# Make prediction
prediction = model.predict(sample_input)
probabilities = model.predict_proba(sample_input)

# Determine action based on prediction and probabilities
if probabilities[0][1] > 0.7:  # High confidence buy
    action = 'buy'
elif probabilities[0][0] > 0.7:  # High confidence sell
    action = 'sell'
else:  # Low confidence, suggest watching
    action = 'watch'

print(f"The agent suggests to {action} based on the provided inputs.")

Enter the amount to be invested: 20000
Enter the list of volatilities (comma separated): 0.4,0.1
Enter the duration of investment in days: 15
Enter your target return percentage: 10
Enter your risk tolerance (low, medium, high): low
The agent suggests to watch based on the provided inputs.
