# Arize Demo Notebook 
This notebook gives an overview of how to send the Dummy Model CSV using the Python SDK.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Arize-ai/client_python/blob/main/arize/examples/tutorials/My_First_Dummy_Model.ipynb)

In [21]:
# Load Dummy Model CSV

import pandas as pd
import requests
import io

url = 'https://storage.googleapis.com/arize-assets/tutorials/fixture_data/mock_model.csv'
url_content=requests.get(url).content
df=pd.read_csv(io.StringIO(url_content.decode('utf-8')), low_memory=False)
df.head()

Unnamed: 0,prediction_id,x,y,actuals,predicted,hex_id,err,time
0,0,847,-200,1859.2,1917.136,a,57.936,1
1,1,157,-67,146.6,202.24,b,55.64,1
2,2,274,0,956.4,1018.304,c,61.904,1
3,3,270,-194,-183.2,-172.344,d,10.856,1
4,4,743,-72,2227.2,2241.296,e,14.096,1


In [4]:
## Install Arize & Initialize Client

!pip install arize -q

from arize.api import Client

API_KEY = 'ARIZE_PROVIDED_API_KEY'
ORGANIZATION_KEY = 'ARIZE_PROVIDED_ORG_KEY'
arize = Client(organization_key=ORGANIZATION_KEY, api_key=API_KEY)

## Important Points 
1. Predictions need a prediction ID. For now, I have just used the row ID in this file. Is there a session UUID that you can send here?
2. Time - I have tweaked the time from the dummy value and instead sent in historical time values. 

In [22]:
# Processing to get Data into Right format for Arize 

# Prediction IDs (must be string typed) are one dataframe
df['prediction_id'] = df['prediction_id'].apply(str)
prediction_ids_df = df.filter(['prediction_id'], axis=1)

# Predictions are one dataframe 
prediction_df = df.filter(['predicted'], axis=1)

# Actuals are one dataframe 
actuals_df = df.filter(['actuals'], axis=1)

# Features and Additional Metadata are one dataframe 
features_df = df.drop(['actuals', 'predicted', 'err', 'time'], axis=1)
features_df.head() 

# Time is one dataframe (this is only necessary because we want to ovewrite the timestamp to something reasonable)
import time
current_time = time.time()
def change_time(row_time):
    return int(current_time - ((1000 - row_time)*3600))
df['time'] = df['time'].apply(change_time)
df.head()


Unnamed: 0,prediction_id,x,y,actuals,predicted,hex_id,err,time
0,0,847,-200,1859.2,1917.136,a,57.936,1593780692
1,1,157,-67,146.6,202.24,b,55.64,1593780692
2,2,274,0,956.4,1018.304,c,61.904,1593780692
3,3,270,-194,-183.2,-172.344,d,10.856,1593780692
4,4,743,-72,2227.2,2241.296,e,14.096,1593780692


In [24]:
# Log the 5000 Predictions and Actuals for the Dummy Model

# This example uses the bulk send.
import concurrent.futures as cf

pred_responses = arize.log_bulk_predictions(
      model_id='my_first_dummy_model',
      model_version='v2', 
      prediction_ids=prediction_ids_df,
      features=features_df,
      prediction_labels=prediction_df,
      time_overwrite=df['time'])
  
for response in cf.as_completed(pred_responses):
  res = response.result()
  print(f'predictions response completed... {res.status_code}')
  if res.status_code != 200:
    print(f'future failed because: {res.text}')

actual_responses = arize.log_bulk_actuals(
  model_id='my_first_dummy_model',
  prediction_ids=prediction_ids_df,
  actual_labels=actuals_df,
  )

for response in cf.as_completed(actual_responses):
  res = response.result()
  print(f'actuals respons completed... {res.status_code}')
  if res.status_code != 200:
    print(f'future failed because: {res.text}')

print('Done sending data to Arize!')
    

predictions response completed... 200
predictions response completed... 200
predictions response completed... 200
predictions response completed... 200
predictions response completed... 200
actuals respons completed... 200
Done sending data to Arize!
