# Try out gradio

**Load and try the model**

In [None]:
# Prerequisites
from tabulate import tabulate
from transformers import pipeline
import json
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import os

# Load the model and create a pipeline for zero-shot classification (1min loading + classifying with 89 labels)
classifier = pipeline("zero-shot-classification", model="sileod/deberta-v3-base-tasksource-nli")
# tried:
# facebook/bart-large-mnli
# 

# get candidate labels
with open("packing_label_structure.json", "r") as file:
    candidate_labels = json.load(file)
keys_list = list(candidate_labels.keys())

# Load test data (in list of dictionaries)
with open("test_data.json", "r") as file:
    packing_data = json.load(file)
# Extract all trip descriptions and trip_types
trip_descriptions = [trip['description'] for trip in packing_data]
trip_types = [trip['trip_types'] for trip in packing_data]

# Access the first trip description
first_trip = trip_descriptions[1]
# Get the packing list for the secondfirst trip
first_trip_type = trip_types[1]

print(f"First trip: {first_trip} \n")
print(f"Trip type: {first_trip_type}")

config.json:   0%|          | 0.00/18.5k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/738M [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/repos/02/27/022742fbdf4a76000fe82e4c637df8c93e9d656674860958652c57c433c438c6/d58b81ba665995176104a66f7b7d0b9e2e412564f6b33917e84c4eb920946841?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1749125341&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0OTEyNTM0MX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy8wMi8yNy8wMjI3NDJmYmRmNGE3NjAwMGZlODJlNGM2MzdkZjhjOTNlOWQ2NTY2NzQ4NjA5NTg2NTJjNTdjNDMzYzQzOGM2L2Q1OGI4MWJhNjY1OTk1MTc2MTA0YTY2ZjdiN2QwYjllMmU0MTI1NjRmNmIzMzkxN2U4NGM0ZWI5MjA5NDY4NDE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=FZegaxqqXa9go3fA9ds9Yb2s-IsYUgtHTnuw1lPpjyIebHawo25TyRPZh1guw30BoPIJUcWlPr8AIxPOK%7EwfLfOccApADwd95kKQRaOxd0QuMY893eflWdmO%7EjwkPNQbn2UpYRIHCcRaKwptMp13LOCPtGHUwYNuM-KVVtK3chpyVCE7pjhhemEKw5bui4LOTOI%7Evird56Dz2PVTao3W5gatzukB5XMi147lEeEU2%7EjPGy9DA62VhG4dN1hU2jnVosY

model.safetensors:  30%|##9       | 220M/738M [00:00<?, ?B/s]

In [None]:
# function that returns pandas data frame with predictions

cut_off = 0.5  # used to choose which activities are relevant

def pred_trip(trip_descr, trip_type, cut_off):
    # Create an empty DataFrame with specified columns
    df = pd.DataFrame(columns=['superclass', 'pred_class'])
    for i, key in enumerate(keys_list):
        if key == 'activities':
            result = classifier(trip_descr, candidate_labels[key], multi_label=True)
            indices = [i for i, score in enumerate(result['scores']) if score > cut_off]
            classes = [result['labels'][i] for i in indices]
        else:
            result = classifier(trip_descr, candidate_labels[key])
            classes = result["labels"][0]
        print(result)
        print(classes)
        print(i)
        df.loc[i] = [key, classes]
    df['true_class'] = trip_type
    return df

In [None]:
# function for accuracy, perc true classes identified and perc wrong pred classes

def perf_measure(df):
    df['same_value'] = df['pred_class'] == df['true_class']
    correct = sum(df.loc[df.index != 1, 'same_value'])
    total = len(df['same_value'])
    accuracy = correct/total
    pred_class = df.loc[df.index == 1, 'pred_class'].iloc[0]
    true_class = df.loc[df.index == 1, 'true_class'].iloc[0]
    correct = [label for label in pred_class if label in true_class]
    num_correct = len(correct)
    correct_perc = num_correct/len(true_class)
    num_pred = len(pred_class)
    wrong_perc = (num_pred - num_correct)/num_pred
    df_perf = pd.DataFrame({
    'accuracy': [accuracy],
    'true_ident': [correct_perc],
    'false_pred': [wrong_perc]
    })
    return(df_perf)

**Now do the same for all trips**

In [None]:
result_list = []
performance = pd.DataFrame(columns=['accuracy', 'true_ident', 'false_pred'])
 
for i in range(len(trip_descriptions)):
    current_trip = trip_descriptions[i]
    current_type = trip_types[i]
    df = pred_trip(current_trip, current_type, cut_off = 0.5)
    print(df)
    
    # accuracy, perc true classes identified and perc wrong pred classes
    performance = pd.concat([performance, perf_measure(df)])
    print(performance)
    
    result_list.append(df)

**Compute average performance measures**

In [None]:
# Extract "same_value" column from each DataFrame
sv_columns = [df['same_value'] for df in result_list]  # 'same' needs to be changed
sv_columns.insert(0, result_list[0]['superclass'])

# Combine into a new DataFrame (columns side-by-side)
sv_df = pd.concat(sv_columns, axis=1)

print(sv_df)

In [None]:
# Compute accuracy per superclass (row means of same_value matrix excluding the first column)
row_means = sv_df.iloc[:, 1:].mean(axis=1)

df_row_means = pd.DataFrame({
    'superclass': sv_df['superclass'],
    'accuracy': row_means
})

print(df_row_means)

In [None]:
# Compute performance measures per trip (mean for each column of performance table)
column_means = performance.mean()
print(column_means)

# Plot histograms for all numeric columns
performance.hist(bins=10, figsize=(10, 6))
plt.tight_layout()
plt.show()

In [None]:
# save results
# Example data for one model
model_name = 'model_b_sileod-deberta-v3-base-tasksource-nli'
# Structure to save
model_result = {
    'model': model_name,
    'predictions': result_list,
    'performance': performance,
    'perf_summary': column_means,
    'perf_superclass': df_row_means
}

# File path with folder
filename = os.path.join('results', f'{model_name}_results.pkl')

# Save the object
with open(filename, 'wb') as f:
    pickle.dump(model_result, f)

In [None]:
# load all model results later for comparison
with open(f'{model_name}_results.pkl', 'rb') as f:
    model_result = pickle.load(f)


**Use gradio for user input**

In [66]:
# use model with gradio
from transformers import pipeline
import gradio as gr

# make a function for what I am doing
def classify(text):
    df = pd.DataFrame(columns=['Superclass', 'class'])
    for i, key in enumerate(keys_list):
        # Run the classification (ca 30 seconds classifying)
        if key == 'activities':
            result = classifier(text, candidate_labels[key], multi_label=True)
            classes = [result['labels'][i] for i in indices]
        else:
            result = classifier(text, candidate_labels[key])
            classes = result["labels"][0]
        print(i)
        df.loc[i] = [key, classes]

    return df

demo = gr.Interface(
    fn=classify,
    inputs="text",
    outputs="dataframe",
    title="Zero-Shot Classification",
    description="Enter a text describing your trip",
)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://aa06d5d85ffadaa92b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


0
1
2
3
4
5
6
7
8
0
1
2
3
4
5
6
7
8


Use model with gradio

In [4]:
# Define the Gradio interface
def classify(text):
    return classifier(text, class_labels)

demo = gr.Interface(
    fn=classify,
    inputs="text",
    outputs="json",
    title="Zero-Shot Classification",
    description="Enter a text describing your trip",
)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://0f70ba5369d721cf8f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
