# Introducing Arize Python SDK Version 3.1.1

## Send 10 million inferences with 21 features to Arize in 90 seconds!

Arize 3.1.1 makes significant improvement on the data ingestion excution time from SDK's side when you send inferences to Arize platform with arize.pandas.logger.

By running this Colab Notebook, you will:
1. set up arize client with your api and organization keys
2. download 10 million prepopulated inferences data with 21 features (integer, float, str), prediction label, actual label, prediction score, and actual score
3. append timestamps to the inferences data spanning over 30 days
4. send 10 million inferences to Arize

In [None]:
!pip -q install arize

import datetime
import time
import uuid

import numpy as np
import pandas as pd
from arize.pandas.logger import Client, Schema
from arize.utils.types import Environments, ModelTypes

import arize

print(f"You are using arize version: {arize.__version__}")

### Set up Arize Client with your API and Organization Keys

In [None]:
ORGANIZATION_KEY = "YOUR_ORG_KEY"
API_KEY = "YOUR_API_KEY"
arize_client = Client(organization_key=ORGANIZATION_KEY, api_key=API_KEY)

### Download 10 million inferences data and load to pandas DataFrame

In [None]:
!wget https://storage.googleapis.com/arize-assets/fixtures/SDK_Benchmark_Data/10m_inferences.ftr

In [None]:
df = pd.read_feather("10m_inferences.ftr")

In [None]:
# append timestamp to inferences so the data points spread out over 30 days in Arize App
current_time = datetime.datetime.now().timestamp()
earlier_time = (datetime.datetime.now() - datetime.timedelta(days=30)).timestamp()
prediction_ts = np.linspace(earlier_time, current_time, num=df.shape[0]).astype(int)
df["prediction_ts"] = prediction_ts

In [None]:
df.shape

In [None]:
df.head()

### Send Data to Arize

In [None]:
schema = Schema(
    prediction_id_column_name="prediction_id",
    prediction_label_column_name="prediction_label",
    prediction_score_column_name="prediction_score",
    actual_label_column_name="actual_label",
    actual_score_column_name="actual_score",
    timestamp_column_name="prediction_ts",
    feature_column_names=df.columns.drop(
        [
            "actual_label",
            "prediction_label",
            "actual_score",
            "prediction_score",
            "prediction_id",
            "prediction_ts",
        ]
    ),
)


start = time.time_ns()
res = arize_client.log(
    dataframe=df,
    path="inferences.bin",
    model_id=(f"10-million-inferences-test-{datetime.datetime.now().strftime('%s')}"),
    model_version="v1.0",
    model_type=ModelTypes.SCORE_CATEGORICAL,
    environment=Environments.PRODUCTION,
    schema=schema,
)
print(f"future completed with response code {res.status_code}")

if res.status_code != 200:
    print(f"future failed with response code {res.status_code}, {res.text}")
else:
    end = time.time_ns()
    print(
        f"request took a total of {int(end - start)/1000000:.2f}ms to serialize and send {df.shape[0]} records with {df.shape[1]} columns.\n"
    )