<h1>Generating Pakistan Tourism Data</h1>

<h2>Importing Important Libraries</h2>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

<h2>Parameters for data generation</h2>

In [3]:

np.random.seed(2024)

years = [2021, 2022, 2023]
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
cities = ['Islamabad', 'Lahore', 'Karachi', 'Murree', 'Hunza', 'Skardu', 'Swat', 'Gilgit', 'Naran', 'Chitral']
tourist_types = ['Domestic', 'International']
accommodation_types = ['Hotel', 'Guest House', 'Camping', 'Airbnb']
transport_modes = ['Car', 'Bus', 'Plane', 'Train']

seasons = {
    'Winter': ['Dec', 'Jan', 'Feb'],
    'Spring': ['Mar', 'Apr', 'May'],
    'Summer': ['Jun', 'Jul', 'Aug'],
    'Autumn': ['Sep', 'Oct', 'Nov']
}
weather_conditions = {
    'Winter': ['Snowy', 'Cloudy', 'Rainy'],
    'Spring': ['Sunny', 'Cloudy', 'Rainy'],
    'Summer': ['Sunny', 'Cloudy', 'Rainy'],
    'Autumn': ['Cloudy', 'Rainy', 'Sunny']
}

# Festivals impacting tourism in Pakistan
festivals = {
    'Dec': 'Winter Festival', 
    'Mar': 'Pakistan Day', 
    'Aug': 'Independence Day', 
    'Jun': 'Summer Vacation Peak'
}

<h2>Functions to process the data</h2>

In [6]:

def get_season(month):
    for season, months in seasons.items():
        if month in months:
            return season
    return 'Unknown'

def get_weather(season):
    return np.random.choice(weather_conditions[season])

def has_festival(month):
    return festivals.get(month, None) is not None

# empty DataFrame
data = []

# Generate data
for year in years:
    for month in months:
        for city in cities:
            tourist_type = np.random.choice(tourist_types)
            accommodation = np.random.choice(accommodation_types)
            transport = np.random.choice(transport_modes)
            
            # if a festival is in the month
            festival_influence = 'Yes' if has_festival(month) else 'No'
            
            # Season and weather influence
            season = get_season(month)
            weather = get_weather(season)
            
            # Tourist numbers influenced by season, festivals, and type
            if tourist_type == 'Domestic':
                base_tourists = np.random.randint(500, 2500)
                if season == 'Summer':
                    base_tourists += np.random.randint(200, 1000)
                if festival_influence == 'Yes':
                    base_tourists += np.random.randint(500, 1500)
            else:
                base_tourists = np.random.randint(200, 1200)
                if season == 'Winter' and city in ['Murree', 'Hunza', 'Skardu']:
                    base_tourists += np.random.randint(300, 800)

            # Tourist expenditure influenced by type and city
            if tourist_type == 'Domestic':
                expenditure = np.random.randint(4000, 18000)
            else:
                expenditure = np.random.randint(15000, 70000)
            
            # Adjusted by weather
            if weather == 'Sunny' and festival_influence == 'Yes':
                expenditure += np.random.randint(2000, 5000)

            # Average stay and revenue
            average_stay = np.random.randint(2, 12)  # Stay duration in days
            revenue = base_tourists * expenditure
            
            # Tourist satisfaction (1-5) influenced by weather
            satisfaction = np.random.randint(3, 6) if weather == 'Sunny' else np.random.randint(1, 5)
            
            # Append data
            data.append([
                year, month, city, tourist_type, base_tourists, expenditure, accommodation, 
                average_stay, revenue, season, weather, festival_influence, transport, satisfaction
            ])



<h2> Creating DataFrame</h2>

In [31]:
columns = [
    'Year', 'Month', 'City', 'Tourist Type', 'Number of Tourists', 'Tourist Expenditure (PKR)', 
    'Accommodation Type', 'Average Stay Duration (Days)', 'Revenue Generated (PKR)', 
    'Season', 'Weather Condition', 'Festival/Event Influence', 'Transport Mode Preference', 
    'Tourist Satisfaction Rating'
]

df = pd.DataFrame(data, columns=columns)
df.to_csv('pakistan_tourism_data.csv', index=False)
df.head(5)

Unnamed: 0,Year,Month,City,Tourist Type,Number of Tourists,Tourist Expenditure (PKR),Accommodation Type,Average Stay Duration (Days),Revenue Generated (PKR),Season,Weather Condition,Festival/Event Influence,Transport Mode Preference,Tourist Satisfaction Rating
0,2021,Jan,Islamabad,International,963,42254,Airbnb,6,40690602,Winter,Cloudy,No,Bus,2
1,2021,Jan,Lahore,International,814,22280,Hotel,6,18135920,Winter,Snowy,No,Train,2
2,2021,Jan,Karachi,Domestic,2204,17582,Camping,2,38750728,Winter,Cloudy,No,Train,3
3,2021,Jan,Murree,Domestic,1648,4145,Airbnb,6,6830960,Winter,Cloudy,No,Train,1
4,2021,Jan,Hunza,Domestic,2048,8967,Airbnb,10,18364416,Winter,Cloudy,No,Bus,2
