# Implement and test prediciton model

## Prerequisites

In [10]:
# Prerequisites
from tabulate import tabulate
from transformers import pipeline
import json
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import os
import time
import math

# get candidate labels
with open("packing_label_structure.json", "r") as file:
    candidate_labels = json.load(file)
keys_list = list(candidate_labels.keys())

# Load test data (in list of dictionaries)
with open("test_data.json", "r") as file:
    packing_data = json.load(file)
# Extract all trip descriptions and trip_types
trip_descriptions = [trip['description'] for trip in packing_data]
trip_types = [trip['trip_types'] for trip in packing_data]

# Access the first trip description
first_trip = trip_descriptions[0]
# Get the packing list for the secondfirst trip
first_trip_type = trip_types[0]

# print(f"First trip: {first_trip} \n")
# print(f"Trip type: {first_trip_type}")

**All trip descriptions**

In [11]:
for i, item in enumerate(trip_descriptions):
    print(i, ".", item, "\n")
    for elem in trip_types[i]:
        print(elem)
    print("\n")

0 . I am planning a trip to Greece with my boyfriend, where we will visit two islands. We have booked an apartment on each island for a few days and plan to spend most of our time relaxing. Our main goals are to enjoy the beach, try delicious local food, and possibly go on a hike—if it’s not too hot. We will be relying solely on public transport. We’re in our late 20s and traveling from the Netherlands. 

beach vacation
['swimming', 'going to the beach', 'relaxing', 'hiking']
warm destination / summer
lightweight (but comfortable)
casual
indoor
no own vehicle
no special conditions to consider
7+ days


1 . We are a couple in our thirties traveling to Vienna for a three-day city trip. We’ll be staying at a friend’s house and plan to explore the city by sightseeing, strolling through the streets, visiting markets, and trying out great restaurants and cafés. We also hope to attend a classical music concert. Our journey to Vienna will be by train. 

city trip
['sightseeing']
variable weath

**Functions**

In [12]:
# function that returns pandas data frame with predictions

cut_off = 0.5  # used to choose which activities are relevant

def pred_trip(model_name, trip_descr, trip_type, cut_off):
    classifier = pipeline("zero-shot-classification", model=model_name)
    # Create an empty DataFrame with specified columns
    df = pd.DataFrame(columns=['superclass', 'pred_class'])
    for i, key in enumerate(keys_list):
        if key == 'activities':
            result = classifier(trip_descr, candidate_labels[key], multi_label=True)
            indices = [i for i, score in enumerate(result['scores']) if score > cut_off]
            classes = [result['labels'][i] for i in indices]
        else:
            result = classifier(trip_descr, candidate_labels[key])
            classes = result["labels"][0]
        print(result)
        print(classes)
        print(i)
        df.loc[i] = [key, classes]
    df['true_class'] = trip_type
    return df

In [13]:
# function for accuracy, perc true classes identified and perc wrong pred classes

def perf_measure(df):
    df['same_value'] = df['pred_class'] == df['true_class']
    correct = sum(df.loc[df.index != 1, 'same_value'])
    total = len(df['same_value'])
    accuracy = correct/total
    pred_class = df.loc[df.index == 1, 'pred_class'].iloc[0]
    true_class = df.loc[df.index == 1, 'true_class'].iloc[0]
    correct = [label for label in pred_class if label in true_class]
    num_correct = len(correct)
    correct_perc = num_correct/len(true_class)
    num_pred = len(pred_class)
    if num_pred == 0:
        wrong_perc = math.nan
    else:
        wrong_perc = (num_pred - num_correct)/num_pred
    df_perf = pd.DataFrame({
    'accuracy': [accuracy],
    'true_ident': [correct_perc],
    'false_pred': [wrong_perc]
    })
    return(df_perf)

## Make predictions for many models and trip descriptions

Provide a list of candidate models and apply them to the test data.

In [5]:
# List of Hugging Face model names
model_names = [
    "facebook/bart-large-mnli",
    "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli",
    ##"cross-encoder/nli-deberta-v3-base",
    "cross-encoder/nli-deberta-v3-large",
    "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli",
    ##"joeddav/bart-large-mnli-yahoo-answers",
    "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli",
    "MoritzLaurer/deberta-v3-large-zeroshot-v2.0",
    "valhalla/distilbart-mnli-12-1",
    #"joeddav/xlm-roberta-large-xnli" # keeps giving errors
]

# Apply each model to the test data
for model_name in model_names:
    print(f"\nUsing model: {model_name}")
    result_list = []
    performance = pd.DataFrame(columns=['accuracy', 'true_ident', 'false_pred'])
    
    start_time = time.time()
    for i in range(len(trip_descriptions)):
        current_trip = trip_descriptions[i]
        current_type = trip_types[i]
        df = pred_trip(model_name, current_trip, current_type, cut_off = 0.5)
        print(df)
        # accuracy, perc true classes identified and perc wrong pred classes
        performance = pd.concat([performance, perf_measure(df)])
        print(performance)
        
        result_list.append(df)
    end_time = time.time()
    elapsed_time = end_time - start_time
    # Extract "same_value" column from each DataFrame
    sv_columns = [df['same_value'] for df in result_list]  # 'same' needs to be changed
    sv_columns.insert(0, result_list[0]['superclass'])
    # Combine into a new DataFrame (columns side-by-side)
    sv_df = pd.concat(sv_columns, axis=1)
    print(sv_df)
    # Compute accuracy per superclass (row means of same_value matrix excluding the first column)
    row_means = sv_df.iloc[:, 1:].mean(axis=1)
    df_row_means = pd.DataFrame({
        'superclass': sv_df['superclass'],
        'accuracy': row_means
    })
    print(df_row_means)
    # Compute performance measures per trip (mean for each column of performance table)
    column_means = performance.mean()
    print(column_means)
    # save results
    model = model_name.replace("/", "-")
    model_result = {
        'model': model,
        'predictions': result_list,
        'performance': performance,
        'perf_summary': column_means,
        'perf_superclass': df_row_means,
        'elapsed_time': elapsed_time
    }
    # File path with folder
    filename = os.path.join('results', f'{model}_results.pkl')
    # Save the object
    with open(filename, 'wb') as f:
        pickle.dump(model_result, f)






Using model: cross-encoder/nli-deberta-v3-base


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I am planning a trip to Greece with my boyfriend, where we will visit two islands. We have booked an apartment on each island for a few days and plan to spend most of our time relaxing. Our main goals are to enjoy the beach, try delicious local food, and possibly go on a hike—if it’s not too hot. We will be relying solely on public transport. We’re in our late 20s and traveling from the Netherlands.', 'labels': ['micro-adventure / weekend trip', 'digital nomad trip', 'beach vacation', 'festival trip', 'city trip', 'cultural exploration', 'road trip (car/camper)', 'camping trip (wild camping)', 'long-distance hike / thru-hike', 'hut trek (winter)', 'ski tour / skitour', 'snowboard / splitboard trip', 'nature escape', 'yoga / wellness retreat', 'hut trek (summer)', 'camping trip (campground)'], 'scores': [0.9722680449485779, 0.007802918087691069, 0.0075571718625724316, 0.0022959215566515923, 0.0021305829286575317, 0.001222927705384791, 0.0009879637509584427, 0.0008052966441

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We are a couple in our thirties traveling to Vienna for a three-day city trip. We’ll be staying at a friend’s house and plan to explore the city by sightseeing, strolling through the streets, visiting markets, and trying out great restaurants and cafés. We also hope to attend a classical music concert. Our journey to Vienna will be by train.', 'labels': ['city trip', 'micro-adventure / weekend trip', 'festival trip', 'digital nomad trip', 'cultural exploration', 'ski tour / skitour', 'hut trek (summer)', 'hut trek (winter)', 'camping trip (campground)', 'long-distance hike / thru-hike', 'beach vacation', 'snowboard / splitboard trip', 'camping trip (wild camping)', 'road trip (car/camper)', 'nature escape', 'yoga / wellness retreat'], 'scores': [0.7273069024085999, 0.16893576085567474, 0.0374605655670166, 0.013699190691113472, 0.010825436562299728, 0.009758710861206055, 0.007331428118050098, 0.004068635869771242, 0.003749603172764182, 0.003352535655722022, 0.0031235795468

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'My partner and I are traveling to the Netherlands and Germany to spend Christmas with our family. We are in our late twenties and will start our journey with a two-hour flight to the Netherlands. From there, we will take a 5.5-hour train ride to northern Germany.', 'labels': ['city trip', 'ski tour / skitour', 'micro-adventure / weekend trip', 'festival trip', 'cultural exploration', 'road trip (car/camper)', 'long-distance hike / thru-hike', 'digital nomad trip', 'camping trip (campground)', 'hut trek (winter)', 'beach vacation', 'nature escape', 'snowboard / splitboard trip', 'camping trip (wild camping)', 'hut trek (summer)', 'yoga / wellness retreat'], 'scores': [0.45239609479904175, 0.39182814955711365, 0.03087054006755352, 0.021208807826042175, 0.019435159862041473, 0.01809830032289028, 0.015263390727341175, 0.014824162237346172, 0.01339301560074091, 0.008623503148555756, 0.0034696790389716625, 0.0033059841953217983, 0.0026981434784829617, 0.0023115249350667, 0.0014

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I’m in my twenties and will be traveling to Peru for three weeks. I’m going solo but will meet up with a friend to explore the Sacred Valley and take part in a Machu Picchu tour. We plan to hike, go rafting, and explore the remnants of the ancient Inca Empire. We’re also excited to try Peruvian cuisine and immerse ourselves in the local culture. Depending on our plans, we might also visit the rainforest region, such as Tarapoto. I’ll be flying to Peru on a long-haul flight and will be traveling in August.', 'labels': ['long-distance hike / thru-hike', 'cultural exploration', 'micro-adventure / weekend trip', 'ski tour / skitour', 'camping trip (campground)', 'festival trip', 'digital nomad trip', 'hut trek (summer)', 'camping trip (wild camping)', 'city trip', 'beach vacation', 'nature escape', 'yoga / wellness retreat', 'hut trek (winter)', 'road trip (car/camper)', 'snowboard / splitboard trip'], 'scores': [0.6391688585281372, 0.1992405354976654, 0.09796122461557388, 0.

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We’re planning a 10-day trip to Austria in the summer, combining hiking with relaxation by the lake. We love exploring scenic trails and enjoying the outdoors, but we also want to unwind and swim in the lake. It’s the perfect mix of adventure and relaxation.', 'labels': ['micro-adventure / weekend trip', 'hut trek (summer)', 'cultural exploration', 'camping trip (campground)', 'ski tour / skitour', 'camping trip (wild camping)', 'long-distance hike / thru-hike', 'festival trip', 'nature escape', 'digital nomad trip', 'city trip', 'yoga / wellness retreat', 'road trip (car/camper)', 'beach vacation', 'snowboard / splitboard trip', 'hut trek (winter)'], 'scores': [0.399262934923172, 0.11123846471309662, 0.10382843762636185, 0.09988072514533997, 0.04976071044802666, 0.042101696133613586, 0.039874546229839325, 0.029952242970466614, 0.027864206582307816, 0.025919852778315544, 0.024940188974142075, 0.020264005288481712, 0.011593679897487164, 0.01072726957499981, 0.0018079130677

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I am going on a multiple day hike and passing though mountains and the beach in Croatia. I like to pack light and will stay in refugios/huts with half board and travel to the start of the hike by car. It will be 6-7 days.', 'labels': ['long-distance hike / thru-hike', 'snowboard / splitboard trip', 'road trip (car/camper)', 'micro-adventure / weekend trip', 'digital nomad trip', 'hut trek (summer)', 'cultural exploration', 'beach vacation', 'nature escape', 'ski tour / skitour', 'camping trip (wild camping)', 'festival trip', 'yoga / wellness retreat', 'hut trek (winter)', 'camping trip (campground)', 'city trip'], 'scores': [0.307785302400589, 0.21902230381965637, 0.2059311717748642, 0.0918341726064682, 0.0317988395690918, 0.022238966077566147, 0.02077283337712288, 0.016396038234233856, 0.016088521108031273, 0.015115255489945412, 0.011458033695816994, 0.011041054502129555, 0.009128374978899956, 0.008595850318670273, 0.00814863946288824, 0.004644663538783789]}
long-distan

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I will go with a friend on a beach holiday and we will do stand-up paddling, and surfing in the North of Spain. The destination is windy and can get cold, but is generally sunny. We will go by car and bring a tent to sleep in. It will be two weeks.', 'labels': ['beach vacation', 'road trip (car/camper)', 'camping trip (campground)', 'micro-adventure / weekend trip', 'camping trip (wild camping)', 'festival trip', 'long-distance hike / thru-hike', 'digital nomad trip', 'ski tour / skitour', 'city trip', 'snowboard / splitboard trip', 'nature escape', 'hut trek (summer)', 'hut trek (winter)', 'yoga / wellness retreat', 'cultural exploration'], 'scores': [0.514082670211792, 0.21859246492385864, 0.17132116854190826, 0.07659809291362762, 0.00910242274403572, 0.002291250042617321, 0.0018519052537158132, 0.0013648553285747766, 0.0011585818137973547, 0.00088925426825881, 0.0006727487780153751, 0.0005743220681324601, 0.0004519206704571843, 0.00041462210356257856, 0.000366220221621

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We will go to Sweden in the winter, to go for a yoga and sauna/wellness retreat. I prefer lightweight packing and also want clothes to go for fancy dinners and maybe on a winter hike. We stay in hotels.', 'labels': ['yoga / wellness retreat', 'micro-adventure / weekend trip', 'ski tour / skitour', 'cultural exploration', 'city trip', 'nature escape', 'digital nomad trip', 'festival trip', 'long-distance hike / thru-hike', 'camping trip (campground)', 'hut trek (winter)', 'camping trip (wild camping)', 'road trip (car/camper)', 'snowboard / splitboard trip', 'beach vacation', 'hut trek (summer)'], 'scores': [0.764270544052124, 0.14882460236549377, 0.01460289116948843, 0.013516460545361042, 0.01233332883566618, 0.01110365241765976, 0.008976204320788383, 0.008688248693943024, 0.0077370391227304935, 0.0027588019147515297, 0.002010364318266511, 0.0018135884311050177, 0.0010747710475698113, 0.0009247296256944537, 0.0008900162065401673, 0.0004748118226416409]}
yoga / wellness re

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I go on a skitouring trip where we also make videos/photos and the destination is Japan. Mainly sports clothes and isolation are needed (it is winter). We stay in a guesthouse. It will be 10 days.', 'labels': ['ski tour / skitour', 'micro-adventure / weekend trip', 'cultural exploration', 'digital nomad trip', 'nature escape', 'city trip', 'festival trip', 'long-distance hike / thru-hike', 'hut trek (winter)', 'camping trip (campground)', 'snowboard / splitboard trip', 'camping trip (wild camping)', 'yoga / wellness retreat', 'beach vacation', 'hut trek (summer)', 'road trip (car/camper)'], 'scores': [0.7912495136260986, 0.18457308411598206, 0.006496830843389034, 0.005906335078179836, 0.0022365122567862272, 0.0019783126190304756, 0.001958322012796998, 0.0015622376231476665, 0.0010011475533246994, 0.0008931723423302174, 0.0008437229553237557, 0.0006696251803077757, 0.0003258714859839529, 0.0001465948298573494, 8.767224062466994e-05, 7.109773287083954e-05]}
ski tour / skito

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We plan a wild camping trip with activities such as snorkeling, kayaking and canoeing. It is a warm place and we want to bring little stuff. We stay in tents and hammocks and travel with a car, it will be 3 days.', 'labels': ['camping trip (wild camping)', 'camping trip (campground)', 'road trip (car/camper)', 'micro-adventure / weekend trip', 'beach vacation', 'cultural exploration', 'digital nomad trip', 'festival trip', 'nature escape', 'hut trek (summer)', 'ski tour / skitour', 'city trip', 'yoga / wellness retreat', 'long-distance hike / thru-hike', 'hut trek (winter)', 'snowboard / splitboard trip'], 'scores': [0.4519400894641876, 0.28106340765953064, 0.13299500942230225, 0.119535893201828, 0.006582783069461584, 0.0017220464069396257, 0.001517383847385645, 0.0014019669033586979, 0.001203698804602027, 0.0011794682359322906, 0.0002996847906615585, 0.00015913322567939758, 0.00015873221855144948, 0.00010613033373374492, 7.342447497649118e-05, 6.111798575147986e-05]}
cam

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I am planning a trip to Greece with my boyfriend, where we will visit two islands. We have booked an apartment on each island for a few days and plan to spend most of our time relaxing. Our main goals are to enjoy the beach, try delicious local food, and possibly go on a hike—if it’s not too hot. We will be relying solely on public transport. We’re in our late 20s and traveling from the Netherlands.', 'labels': ['cultural exploration', 'micro-adventure / weekend trip', 'beach vacation', 'city trip', 'road trip (car/camper)', 'long-distance hike / thru-hike', 'festival trip', 'nature escape', 'digital nomad trip', 'ski tour / skitour', 'hut trek (summer)', 'camping trip (campground)', 'camping trip (wild camping)', 'hut trek (winter)', 'snowboard / splitboard trip', 'yoga / wellness retreat'], 'scores': [0.16716161370277405, 0.12716785073280334, 0.1101449728012085, 0.08154628425836563, 0.06655469536781311, 0.05304424837231636, 0.04946322366595268, 0.0479649193584919, 0.047

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We are a couple in our thirties traveling to Vienna for a three-day city trip. We’ll be staying at a friend’s house and plan to explore the city by sightseeing, strolling through the streets, visiting markets, and trying out great restaurants and cafés. We also hope to attend a classical music concert. Our journey to Vienna will be by train.', 'labels': ['city trip', 'cultural exploration', 'micro-adventure / weekend trip', 'road trip (car/camper)', 'festival trip', 'ski tour / skitour', 'long-distance hike / thru-hike', 'digital nomad trip', 'nature escape', 'hut trek (summer)', 'beach vacation', 'snowboard / splitboard trip', 'hut trek (winter)', 'camping trip (campground)', 'camping trip (wild camping)', 'yoga / wellness retreat'], 'scores': [0.25980284810066223, 0.16085410118103027, 0.11896767467260361, 0.05226515233516693, 0.050702739506959915, 0.046058136969804764, 0.0418943352997303, 0.04128628969192505, 0.031528014689683914, 0.030673017725348473, 0.029140915721654

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'My partner and I are traveling to the Netherlands and Germany to spend Christmas with our family. We are in our late twenties and will start our journey with a two-hour flight to the Netherlands. From there, we will take a 5.5-hour train ride to northern Germany.', 'labels': ['festival trip', 'cultural exploration', 'city trip', 'micro-adventure / weekend trip', 'hut trek (winter)', 'road trip (car/camper)', 'ski tour / skitour', 'digital nomad trip', 'hut trek (summer)', 'beach vacation', 'long-distance hike / thru-hike', 'snowboard / splitboard trip', 'camping trip (campground)', 'nature escape', 'camping trip (wild camping)', 'yoga / wellness retreat'], 'scores': [0.293078750371933, 0.16062624752521515, 0.06345170736312866, 0.06158151477575302, 0.056430768221616745, 0.04617263004183769, 0.040666013956069946, 0.03946645185351372, 0.03829963505268097, 0.03487568348646164, 0.03402161970734596, 0.02967722713947296, 0.02956923469901085, 0.027611277997493744, 0.0257157292217

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I’m in my twenties and will be traveling to Peru for three weeks. I’m going solo but will meet up with a friend to explore the Sacred Valley and take part in a Machu Picchu tour. We plan to hike, go rafting, and explore the remnants of the ancient Inca Empire. We’re also excited to try Peruvian cuisine and immerse ourselves in the local culture. Depending on our plans, we might also visit the rainforest region, such as Tarapoto. I’ll be flying to Peru on a long-haul flight and will be traveling in August.', 'labels': ['cultural exploration', 'micro-adventure / weekend trip', 'city trip', 'long-distance hike / thru-hike', 'road trip (car/camper)', 'festival trip', 'beach vacation', 'nature escape', 'digital nomad trip', 'ski tour / skitour', 'hut trek (summer)', 'camping trip (wild camping)', 'camping trip (campground)', 'hut trek (winter)', 'snowboard / splitboard trip', 'yoga / wellness retreat'], 'scores': [0.2301025688648224, 0.09925426542758942, 0.08985505998134613, 0

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We’re planning a 10-day trip to Austria in the summer, combining hiking with relaxation by the lake. We love exploring scenic trails and enjoying the outdoors, but we also want to unwind and swim in the lake. It’s the perfect mix of adventure and relaxation.', 'labels': ['micro-adventure / weekend trip', 'nature escape', 'cultural exploration', 'long-distance hike / thru-hike', 'hut trek (summer)', 'road trip (car/camper)', 'ski tour / skitour', 'beach vacation', 'camping trip (wild camping)', 'yoga / wellness retreat', 'camping trip (campground)', 'city trip', 'festival trip', 'digital nomad trip', 'snowboard / splitboard trip', 'hut trek (winter)'], 'scores': [0.1256595253944397, 0.11564762890338898, 0.0818299949169159, 0.0732298418879509, 0.06507544964551926, 0.06311747431755066, 0.06167558953166008, 0.05981043726205826, 0.05292873829603195, 0.052205272018909454, 0.0516391284763813, 0.05140812322497368, 0.04123023897409439, 0.03586934134364128, 0.03446359559893608, 0.0

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I am going on a multiple day hike and passing though mountains and the beach in Croatia. I like to pack light and will stay in refugios/huts with half board and travel to the start of the hike by car. It will be 6-7 days.', 'labels': ['long-distance hike / thru-hike', 'micro-adventure / weekend trip', 'nature escape', 'camping trip (wild camping)', 'road trip (car/camper)', 'cultural exploration', 'beach vacation', 'ski tour / skitour', 'camping trip (campground)', 'snowboard / splitboard trip', 'hut trek (summer)', 'city trip', 'hut trek (winter)', 'digital nomad trip', 'festival trip', 'yoga / wellness retreat'], 'scores': [0.20863620936870575, 0.09685744345188141, 0.07517095655202866, 0.07343076914548874, 0.0706576257944107, 0.06391564011573792, 0.06255688518285751, 0.053629063069820404, 0.050872139632701874, 0.04593609273433685, 0.03691982477903366, 0.03505941480398178, 0.03402595594525337, 0.032621342688798904, 0.030087171122431755, 0.029623446986079216]}
long-distan

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I will go with a friend on a beach holiday and we will do stand-up paddling, and surfing in the North of Spain. The destination is windy and can get cold, but is generally sunny. We will go by car and bring a tent to sleep in. It will be two weeks.', 'labels': ['beach vacation', 'micro-adventure / weekend trip', 'road trip (car/camper)', 'nature escape', 'cultural exploration', 'camping trip (wild camping)', 'long-distance hike / thru-hike', 'camping trip (campground)', 'festival trip', 'ski tour / skitour', 'snowboard / splitboard trip', 'hut trek (summer)', 'city trip', 'hut trek (winter)', 'digital nomad trip', 'yoga / wellness retreat'], 'scores': [0.23744942247867584, 0.10603270679712296, 0.06524186581373215, 0.06422917544841766, 0.06201164796948433, 0.0533960796892643, 0.05292251333594322, 0.04764849692583084, 0.04592280834913254, 0.044455721974372864, 0.04425935447216034, 0.042198047041893005, 0.03631977364420891, 0.034582093358039856, 0.03345293551683426, 0.029877

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We will go to Sweden in the winter, to go for a yoga and sauna/wellness retreat. I prefer lightweight packing and also want clothes to go for fancy dinners and maybe on a winter hike. We stay in hotels.', 'labels': ['yoga / wellness retreat', 'micro-adventure / weekend trip', 'nature escape', 'long-distance hike / thru-hike', 'cultural exploration', 'ski tour / skitour', 'hut trek (winter)', 'city trip', 'beach vacation', 'road trip (car/camper)', 'hut trek (summer)', 'festival trip', 'camping trip (wild camping)', 'snowboard / splitboard trip', 'camping trip (campground)', 'digital nomad trip'], 'scores': [0.2606178820133209, 0.08046605437994003, 0.06843063235282898, 0.059995006769895554, 0.059469204396009445, 0.05855844169855118, 0.055849116295576096, 0.047889091074466705, 0.045752931386232376, 0.04353173449635506, 0.04008011892437935, 0.038273148238658905, 0.03779057413339615, 0.03708784282207489, 0.03450765833258629, 0.03170054778456688]}
yoga / wellness retreat
0
{'s

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'I go on a skitouring trip where we also make videos/photos and the destination is Japan. Mainly sports clothes and isolation are needed (it is winter). We stay in a guesthouse. It will be 10 days.', 'labels': ['ski tour / skitour', 'micro-adventure / weekend trip', 'cultural exploration', 'road trip (car/camper)', 'city trip', 'snowboard / splitboard trip', 'long-distance hike / thru-hike', 'digital nomad trip', 'hut trek (winter)', 'festival trip', 'hut trek (summer)', 'camping trip (wild camping)', 'nature escape', 'camping trip (campground)', 'beach vacation', 'yoga / wellness retreat'], 'scores': [0.1734267771244049, 0.09546443074941635, 0.08052344620227814, 0.07985977828502655, 0.06759097427129745, 0.06277172267436981, 0.054686594754457474, 0.05254476144909859, 0.0503639318048954, 0.044427838176488876, 0.043578390032052994, 0.043481748551130295, 0.04278255254030228, 0.04047577455639839, 0.038092684000730515, 0.029928630217909813]}
ski tour / skitour
0
{'sequence': 'I

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'sequence': 'We plan a wild camping trip with activities such as snorkeling, kayaking and canoeing. It is a warm place and we want to bring little stuff. We stay in tents and hammocks and travel with a car, it will be 3 days.', 'labels': ['camping trip (wild camping)', 'camping trip (campground)', 'micro-adventure / weekend trip', 'road trip (car/camper)', 'nature escape', 'beach vacation', 'long-distance hike / thru-hike', 'cultural exploration', 'festival trip', 'ski tour / skitour', 'hut trek (summer)', 'city trip', 'hut trek (winter)', 'snowboard / splitboard trip', 'digital nomad trip', 'yoga / wellness retreat'], 'scores': [0.24850936233997345, 0.14938758313655853, 0.09667350351810455, 0.08339172601699829, 0.05508505925536156, 0.051402922719717026, 0.044685814529657364, 0.03972415253520012, 0.03578963875770569, 0.03319094330072403, 0.03277123346924782, 0.029747819527983665, 0.02797492779791355, 0.027441974729299545, 0.023192668333649635, 0.021030662581324577]}
camping trip (wild

## Load and compare results

In [14]:
# Folder where .pkl files are saved
results_dir = 'results/before'

# Dictionary to store all loaded results
all_results = {}

# Loop through all .pkl files in the folder
for filename in os.listdir(results_dir):
    if filename.endswith('.pkl'):
        model_name = filename.replace('_results.pkl', '')  # Extract model name
        file_path = os.path.join(results_dir, filename)
        
        # Load the result
        with open(file_path, 'rb') as f:
            result = pickle.load(f)
            all_results[model_name] = result

# Compare performance across models
for model, data in all_results.items():
    print(f"Model: {model}")
    print(f"Performance Summary:\n{data['perf_summary']}")
    print("-" * 40)

# Compare performance across models
for model, data in all_results.items():
    print(f"Model: {model}")
    print(f"Performance Summary:\n{data['perf_superclass']}")
    print("-" * 40)

Model: cross-encoder-nli-deberta-v3-base
Performance Summary:
accuracy      0.444444
true_ident    0.533333
false_pred    0.712500
dtype: float64
----------------------------------------
Model: joeddav-bart-large-mnli-yahoo-answers
Performance Summary:
accuracy      0.344444
true_ident    0.650000
false_pred    0.553792
dtype: float64
----------------------------------------
Model: cross-encoder-nli-deberta-v3-large
Performance Summary:
accuracy      0.466667
true_ident    0.566667
false_pred    0.541667
dtype: float64
----------------------------------------
Model: MoritzLaurer-DeBERTa-v3-large-mnli-fever-anli-ling-wanli
Performance Summary:
accuracy      0.566667
true_ident    0.841667
false_pred    0.546667
dtype: float64
----------------------------------------
Model: MoritzLaurer-mDeBERTa-v3-base-mnli-xnli
Performance Summary:
accuracy      0.466667
true_ident    0.408333
false_pred    0.481250
dtype: float64
----------------------------------------
Model: MoritzLaurer-deberta-v3-

**Identify trips that are difficult to predict**

Per model

In [15]:
def get_difficult_trips(model_result, cut_off = 0.6):
    # model_result is a dict with dict_keys(['model', 'predictions', 
    # 'performance', 'perf_summary', 'perf_superclass', 'elapsed_time'])
    # get performance dataframe and repair index
    df = model_result['performance'].reset_index(drop=True)
    # find index of trips whose accuracy is below cut_off
    index_result = df[df['accuracy'] < cut_off].index
    return(index_result)

# dictionary of trips that have accuracy below cut_off default
difficult_trips_dict = {}
for model, data in all_results.items():
    difficult_trips_dict[data["model"]] = get_difficult_trips(data)

for key, value in difficult_trips_dict.items():
    print(f"{key}: {value}\n")

cross-encoder-nli-deberta-v3-base: Index([0, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

joeddav-bart-large-mnli-yahoo-answers: RangeIndex(start=0, stop=10, step=1)

cross-encoder-nli-deberta-v3-large: Index([0, 1, 2, 3, 4, 6, 7, 8, 9], dtype='int64')

MoritzLaurer-DeBERTa-v3-large-mnli-fever-anli-ling-wanli: Index([2, 5, 6, 7, 8, 9], dtype='int64')

MoritzLaurer-mDeBERTa-v3-base-mnli-xnli: RangeIndex(start=0, stop=10, step=1)

MoritzLaurer-deberta-v3-large-zeroshot-v2.0: Index([0, 1, 2, 3, 4, 5, 6, 7, 9], dtype='int64')

facebook-bart-large-mnli: RangeIndex(start=0, stop=10, step=1)

valhalla-distilbart-mnli-12-1: Index([0, 1, 2, 3, 4, 7, 9], dtype='int64')

MoritzLaurer-DeBERTa-v3-base-mnli-fever-anli: Index([0, 2, 3, 4, 6, 7], dtype='int64')



For all models

In [16]:
# Which trips are difficult for all models
common = set.intersection(*(set(v) for v in difficult_trips_dict.values()))
for index in common:
    print(index, ".", trip_descriptions[index], "\n")
    for item in trip_types[index]:
        print(item)
    print("\n")

2 . My partner and I are traveling to the Netherlands and Germany to spend Christmas with our family. We are in our late twenties and will start our journey with a two-hour flight to the Netherlands. From there, we will take a 5.5-hour train ride to northern Germany. 

city trip
['relaxing']
cold destination / winter
lightweight (but comfortable)
casual
indoor
no own vehicle
no special conditions to consider
7+ days


7 . We will go to Sweden in the winter, to go for a yoga and sauna/wellness retreat. I prefer lightweight packing and also want clothes to go for fancy dinners and maybe on a winter hike. We stay in hotels. 

yoga / wellness retreat
['hut-to-hut hiking', 'yoga']
cold destination / winter
lightweight (but comfortable)
formal (business trip)
sleeping in a tent
no own vehicle
avalanche-prone terrain
7 days




**Identify superclasses that are difficult to predict**

Per model

In [17]:
def get_difficult_superclasses(model_result, cut_off = 0.6):
    # model_result is a dict with dict_keys(['model', 'predictions', 
    # 'performance', 'perf_summary', 'perf_superclass', 'elapsed_time'])
    df = model_result["perf_superclass"]
    # find superclass whose accuracy is below cut_off
    diff_spc = list(df[df['accuracy'] < cut_off]["superclass"])
    return(diff_spc)

# make dictionary of superclasses that have accuracy below cut_off default
difficult_superclass_dict = {}
for model, data in all_results.items():
    difficult_superclass_dict[data["model"]] = get_difficult_superclasses(data)

for key, value in difficult_superclass_dict.items():
    print(f"{key}: {value}\n")

cross-encoder-nli-deberta-v3-base: ['activities', 'climate_or_season', 'style_or_comfort', 'special_conditions']

joeddav-bart-large-mnli-yahoo-answers: ['activities', 'climate_or_season', 'style_or_comfort', 'dress_code', 'accommodation', 'transportation', 'special_conditions']

cross-encoder-nli-deberta-v3-large: ['activities', 'climate_or_season', 'style_or_comfort', 'transportation', 'special_conditions']

MoritzLaurer-DeBERTa-v3-large-mnli-fever-anli-ling-wanli: ['activities', 'style_or_comfort', 'special_conditions']

MoritzLaurer-mDeBERTa-v3-base-mnli-xnli: ['activities', 'style_or_comfort', 'accommodation', 'special_conditions', 'trip_length_days']

MoritzLaurer-deberta-v3-large-zeroshot-v2.0: ['activities', 'climate_or_season', 'style_or_comfort', 'accommodation', 'special_conditions']

facebook-bart-large-mnli: ['activities', 'style_or_comfort', 'accommodation', 'special_conditions']

valhalla-distilbart-mnli-12-1: ['activities', 'style_or_comfort', 'accommodation', 'special_

For all models

In [18]:
# Which trips are difficult for all models
common = set.intersection(*(set(v) for v in difficult_superclass_dict.values()))
print(common)

{'style_or_comfort', 'activities', 'special_conditions'}


In [19]:
# Look at particular predicitons in detail
# print(all_results["joeddav-bart-large-mnli-yahoo-answers"])

**Comparing models**

In [24]:
# Make table of 'perf_summary' for all models inlcude time elapsed
print(type(all_results))
print(type(all_results["MoritzLaurer-DeBERTa-v3-base-mnli-fever-anli"]))
print(all_results["MoritzLaurer-DeBERTa-v3-base-mnli-fever-anli"].keys())
print(type(all_results["MoritzLaurer-DeBERTa-v3-base-mnli-fever-anli"]["perf_summary"]))
print(all_results["MoritzLaurer-DeBERTa-v3-base-mnli-fever-anli"]["perf_summary"])
print(all_results["MoritzLaurer-DeBERTa-v3-base-mnli-fever-anli"]["perf_summary"]["accuracy"])
# make empty data frame
# fill in for loop with perf_summary per model


# Make ranking from that table for each category


<class 'dict'>
<class 'dict'>
dict_keys(['model', 'predictions', 'performance', 'perf_summary', 'perf_superclass', 'elapsed_time'])
<class 'pandas.core.series.Series'>
accuracy      0.522222
true_ident    0.841667
false_pred    0.572381
dtype: float64
0.5222222222222223


# Use gradio for user input

In [66]:
# use model with gradio
from transformers import pipeline
import gradio as gr

# make a function for what I am doing
def classify(text):
    df = pd.DataFrame(columns=['Superclass', 'class'])
    for i, key in enumerate(keys_list):
        # Run the classification (ca 30 seconds classifying)
        if key == 'activities':
            result = classifier(text, candidate_labels[key], multi_label=True)
            classes = [result['labels'][i] for i in indices]
        else:
            result = classifier(text, candidate_labels[key])
            classes = result["labels"][0]
        print(i)
        df.loc[i] = [key, classes]

    return df

demo = gr.Interface(
    fn=classify,
    inputs="text",
    outputs="dataframe",
    title="Zero-Shot Classification",
    description="Enter a text describing your trip",
)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://aa06d5d85ffadaa92b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


0
1
2
3
4
5
6
7
8
0
1
2
3
4
5
6
7
8


Use model with gradio

In [4]:
# Define the Gradio interface
def classify(text):
    return classifier(text, class_labels)

demo = gr.Interface(
    fn=classify,
    inputs="text",
    outputs="json",
    title="Zero-Shot Classification",
    description="Enter a text describing your trip",
)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://0f70ba5369d721cf8f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
