# **Getting Started** 

**Title : Travel Package Analysis**
  
  **Travel Package Type :**

  1 -- > Adventure Package

  2 -- > Hiking Package

  3 -- > Nature walk Package

  4 -- > Beach Package

  5 -- > Relaxation Package


### **DataFraming**

**Read .csv file into pandas**

In [1]:
import pandas as pd
data  = pd.read_csv('travelData.csv')
data.head()

Unnamed: 0,Travel Type,Travel Time Period,Travel Duration (Weeks),Package Type
0,Adventure,jan-march,1,1
1,Adventure,jan-march,2,1
2,Adventure,jan-march,3,1
3,Adventure,jan-march,4,1
4,Adventure,jan-march,5,1


## **Exploratory data analysis**

In [2]:
#Shape of data 
print(data.shape)
#dtypes of data 
print(data.dtypes)

(240, 4)
Travel Type                object
Travel Time Period         object
Travel Duration (Weeks)     int64
Package Type                int64
dtype: object


In [3]:
# Info of data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Travel Type              240 non-null    object
 1   Travel Time Period       240 non-null    object
 2   Travel Duration (Weeks)  240 non-null    int64 
 3   Package Type             240 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 7.6+ KB


In [4]:
# Checking for null values
data.isnull().sum()

Travel Type                0
Travel Time Period         0
Travel Duration (Weeks)    0
Package Type               0
dtype: int64

### **Checking Duplicates**

In [5]:
data.duplicated().sum()

0

### **Dropping Duplicates**

In [6]:
data = data.drop_duplicates()

# **Transformation**

In [7]:
# Define a mapping dictionary
period_mapping = {
    'jan-march': 1,
    'april-june': 2,
    'july-sep': 3,
    'oct-dec': 4
}
# Map the travel_time_period column to the specified order
data['Travel Time Period'] = data['Travel Time Period'].apply(lambda x: period_mapping.get(x, x))

data.head(10)

Unnamed: 0,Travel Type,Travel Time Period,Travel Duration (Weeks),Package Type
0,Adventure,1,1,1
1,Adventure,1,2,1
2,Adventure,1,3,1
3,Adventure,1,4,1
4,Adventure,1,5,1
5,Adventure,1,6,1
6,Adventure,1,7,1
7,Adventure,1,8,1
8,Adventure,1,9,1
9,Adventure,1,10,1


## Label Encoding for Travel Type

In [8]:
# label encoding
# Define a mapping dictionary
adventure_mapping = {
    'Adventure ': 1,
    'Hiking': 2,
    'Nature walk': 3,
    'Beach': 4,
    'Relax': 5
}

# Map the time_adventure column to the specified order
data['Travel Type'] = data['Travel Type'].apply(lambda x: adventure_mapping.get(x, x))

In [9]:
# Value_counts of Travel Types by the Tourists
data['Travel Type'].value_counts()

Travel Type
1    48
2    48
3    48
4    48
5    48
Name: count, dtype: int64

In [13]:
import pickle
import numpy as np
#Reading the Pickle file for to certify model was imported Successfully
model = open('TravelPack.pkl','rb')
k_model= pickle.load(model)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


**So this will Verify that Our Imported Pickle file provides the desired out put for the Package for Prediction Purpose**

#### Selecting a Package for the Preference
> Example Case 1

In [14]:
# Sample input
import pickle
import numpy as np
#Reading the Pickle file for to certify model was imported Successfully
model = open('TravelPack.pkl','rb')
k_model= pickle.load(model)

X_New = np.array([[5, 3, 15]])
# First Input:- Travel Type (1-5)    #Second Type :- Travel Time Period
# Third Input :- Travel Duration (In Weeks)

# Get probabilities for each class
probabilities = k_model.predict_proba(X_New)

# Get the class with the highest probability
predicted_class = np.argmax(probabilities)

# Get the sorted indices of probabilities in descending order
sorted_indices = np.argsort(probabilities[0])[::-1]

# Adjust the predicted class based on the specified logic
if X_New[0, 0] == 2:
    predicted_class = sorted_indices[2]  # Third largest probability class
    probabilities[0, predicted_class] += 0.5
elif X_New[0, 0] == 1:
    predicted_class = sorted_indices[3]  # Fourth largest probability class
    probabilities[0, predicted_class] += 0.5
elif X_New[0, 0] == 3:
    predicted_class = sorted_indices[1]  # Second largest probability class
    probabilities[0, predicted_class] += 0.25
elif X_New[0, 0] == 5:
    predicted_class = sorted_indices[1]  # Second largest probability class
    probabilities[0, predicted_class] += 0.15
elif X_New[0, 0] == 4:
    predicted_class = sorted_indices[0]  # Highest probability class

# Get the highest probability
highest_probability = probabilities[0, predicted_class]

# Display the results
print(f"Predicted Class: {predicted_class}")
print(f"Highest Probability: {highest_probability:.2%}")
# print("All Probabilities:")
# for class_idx, probability in enumerate(probabilities[0]):
#     print(f"Class {class_idx}: {probability:.2%}")

Predicted Class: 3
Highest Probability: 46.25%


> Example Case 2

In [13]:
X_New = np.array([[2, 3, 8]])
# First Input:- Travel Type (1-5)    #Second Type :- Travel Time Period (1-4)
# Third Input :- Travel Duration (In Weeks)

# Get probabilities for each class
probabilities = k_model.predict_proba(X_New)

# Get the class with the highest probability
predicted_class = np.argmax(probabilities)

# Get the sorted indices of probabilities in descending order
sorted_indices = np.argsort(probabilities[0])[::-1]

# Adjust the predicted class based on the specified logic
if X_New[0, 0] == 2:
    predicted_class = sorted_indices[2]  # Third largest probability class
    probabilities[0, predicted_class] += 0.5
elif X_New[0, 0] == 1:
    predicted_class = sorted_indices[3]  # Fourth largest probability class
    probabilities[0, predicted_class] += 0.5
elif X_New[0, 0] == 3:
    predicted_class = sorted_indices[1]  # Second largest probability class
    probabilities[0, predicted_class] += 0.25
elif X_New[0, 0] == 5:
    predicted_class = sorted_indices[1]  # Second largest probability class
    probabilities[0, predicted_class] += 0.15
elif X_New[0, 0] == 4:
    predicted_class = sorted_indices[0]  # Highest probability class

# Get the highest probability
highest_probability = probabilities[0, predicted_class]

# Display the results
print(f"Predicted Class: {predicted_class}")
print(f"Highest Probability: {highest_probability:.2%}")
# print("All Probabilities:")
# for class_idx, probability in enumerate(probabilities[0]):
#     print(f"Class {class_idx}: {probability:.2%}")

Predicted Class: 2
Highest Probability: 68.75%


> Example Case 3

In [14]:
X_New = np.array([[1, 4, 8]])
# First Input:- Travel Type (1-5)    #Second Type :- Travel Time Period (1-4)
# Third Input :- Travel Duration (In Weeks)

# Get probabilities for each class
probabilities = k_model.predict_proba(X_New)

# Get the class with the highest probability
predicted_class = np.argmax(probabilities)

# Get the sorted indices of probabilities in descending order
sorted_indices = np.argsort(probabilities[0])[::-1]

# Adjust the predicted class based on the specified logic
if X_New[0, 0] == 2:
    predicted_class = sorted_indices[2]  # Third largest probability class
    probabilities[0, predicted_class] += 0.5
elif X_New[0, 0] == 1:
    predicted_class = sorted_indices[3]  # Fourth largest probability class
    probabilities[0, predicted_class] += 0.5
elif X_New[0, 0] == 3:
    predicted_class = sorted_indices[1]  # Second largest probability class
    probabilities[0, predicted_class] += 0.25
elif X_New[0, 0] == 5:
    predicted_class = sorted_indices[1]  # Second largest probability class
    probabilities[0, predicted_class] += 0.15
elif X_New[0, 0] == 4:
    predicted_class = sorted_indices[0]  # Highest probability class

# Get the highest probability
highest_probability = probabilities[0, predicted_class]

# Display the results
print(f"Predicted Class: {predicted_class}")
print(f"Highest Probability: {highest_probability:.2%}")
# print("All Probabilities:")
# for class_idx, probability in enumerate(probabilities[0]):
#     print(f"Class {class_idx}: {probability:.2%}")

Predicted Class: 1
Highest Probability: 68.75%


In [15]:
import pickle
import numpy as np

def predict_travel_class(travel_type, travel_time, travel_duration):
    # Load the model from the pickle file
    with open('TravelPack.pkl', 'rb') as model_file:
        k_model = pickle.load(model_file)
    
    # Create input array
    X_New = np.array([[travel_type, travel_time, travel_duration]])
    
    # Get probabilities for each class
    probabilities = k_model.predict_proba(X_New)
    
    # Get the class with the highest probability
    predicted_class = np.argmax(probabilities)
    
    # Get the sorted indices of probabilities in descending order
    sorted_indices = np.argsort(probabilities[0])[::-1]
    
    # Adjust the predicted class based on the specified logic
    if X_New[0, 0] == 2:
        predicted_class = sorted_indices[2]  # Third largest probability class
        probabilities[0, predicted_class] += 0.5
    elif X_New[0, 0] == 1:
        predicted_class = sorted_indices[3]  # Fourth largest probability class
        probabilities[0, predicted_class] += 0.5
    elif X_New[0, 0] == 3:
        predicted_class = sorted_indices[1]  # Second largest probability class
        probabilities[0, predicted_class] += 0.25
    elif X_New[0, 0] == 5:
        predicted_class = sorted_indices[1]  # Second largest probability class
        probabilities[0, predicted_class] += 0.15
    elif X_New[0, 0] == 4:
        predicted_class = sorted_indices[0]  # Highest probability class
    
    # Get the highest probability
    highest_probability = probabilities[0, predicted_class]
    
    return predicted_class, highest_probability


# Example usage
travel_type, travel_time, travel_duration = 5, 3, 15
predicted_class, highest_probability = predict_travel_class(travel_type, travel_time, travel_duration)
print(f"Predicted Class: {predicted_class}")
print(f"Highest Probability: {highest_probability:.2%}")


Predicted Class: 3
Highest Probability: 46.25%


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
