In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pandas as pd
import os

data_path = "/content/drive/MyDrive/lab 4 data dm/"

files = [f for f in os.listdir(data_path) if f.endswith(".csv")]
print("CSV files found:", files)

CSV files found: ['business_recorder(2020-2023).csv', 'tribune(full-data).csv', 'daily_times(full-data).csv', 'pakistan_today(full-data).csv', 'dawn (full-data).csv']


In [2]:
# available years
years_found = set()
for file in files:
    file_path = os.path.join(data_path, file)
    print(f"Scanning {file}...")
    for chunk in pd.read_csv(file_path, chunksize=50000, encoding="latin-1", usecols=['date']):
        chunk['date'] = pd.to_datetime(chunk['date'], errors='coerce')
        years_found.update(chunk['date'].dt.year.dropna().unique())

years_found = sorted([int(y) for y in years_found if pd.notna(y)])
print("\nYears found in dataset:", years_found)

Scanning business_recorder(2020-2023).csv...
Scanning tribune(full-data).csv...
Scanning daily_times(full-data).csv...
Scanning pakistan_today(full-data).csv...
Scanning dawn (full-data).csv...

Years found in dataset: [2020, 2021, 2022, 2023]


## **Pakistan Today**

In [3]:
selected_files = files[3:4]
print("Selected files:", selected_files)

dataframes = {}

for file in selected_files:
    file_path = os.path.join(data_path, file)
    df_name = os.path.splitext(file)[0]
    try:
        # load CSV
        df = pd.read_csv(file_path, encoding="latin-1", index_col=False)

        # remove unnamed columns
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

        # store cleaned dataframe
        dataframes[df_name] = df
        print(f"Loaded {file} (cleaned unnamed columns)")
    except Exception as e:
        print(f"Error loading {file}: {e}")

# display summary
for name, df in dataframes.items():
    print(f"\nDataFrame: {name}")
    display(df.head())
    print(f"Columns: {df.columns.tolist()}")


Selected files: ['pakistan_today(full-data).csv']
Loaded pakistan_today(full-data).csv (cleaned unnamed columns)

DataFrame: pakistan_today(full-data)


Unnamed: 0,headline,date,link,source,categories,description
0,FATF pressure leads banks to educate customers...,1/1/2020,https://profit.pakistantoday.com.pk/2020/01/01...,Pakistan Today,Business,\n\nOne of the leading commercial banks in Pak...
1,"Amid delays, Diamer-Bhashaâs PC-1 cost surge...",1/1/2020,https://profit.pakistantoday.com.pk/2020/01/01...,Pakistan Today,Business,WAPDA yet to acquire land for the dam's site d...
2,Govt finally allows used car importers to pay ...,1/1/2020,https://profit.pakistantoday.com.pk/2020/01/01...,Pakistan Today,Business,\n\nISLAMABAD:Â The federal government has pe...
3,December inflation clocks in at 12.63pc,1/1/2020,https://profit.pakistantoday.com.pk/2020/01/01...,Pakistan Today,Business,\n\nISLAMABAD:Â Inflation rose to 12.63 per c...
4,NEPRA notifies Rs1.56 per unit hike in power t...,1/1/2020,https://profit.pakistantoday.com.pk/2020/01/01...,Pakistan Today,Business,\n\nISLAMABAD:Â The National Electric Power R...


Columns: ['headline', 'date', 'link', 'source', 'categories', 'description']


In [4]:
all_unique_categories = set()

for df_name, df in dataframes.items():
    if 'categories' in df.columns:
        # Ensure 'categories' column is treated as strings and handle NaNs
        df['categories'] = df['categories'].astype(str).str.lower()
        for categories_str in df['categories'].dropna().unique():
            # Split by comma and clean up whitespace
            for category in categories_str.split(','):
                cleaned_category = category.strip()
                if cleaned_category and cleaned_category != 'nan':
                    all_unique_categories.add(cleaned_category)

print("Unique Categories:")
for category in sorted(list(all_unique_categories)):
    print(f"- {category}")

Unique Categories:
- agriculture & national
- analysis & e-papers & pakistan today
- analysis & headlines
- analysis & headlines & national & top headlines
- analysis & national
- book review
- book review & e-papers & pakistan today
- book review & national
- business
- cartoon & editorials
- cartoon & letters
- cartoon & opinion
- city
- city & education & multan
- city & featured & lahore
- city & headlines
- city & headlines & lahore
- city & headlines & national
- city & islamabad
- city & islamabad & national
- city & karachi
- city & karachi & national
- city & lahore
- city & lahore & national
- city & lahore & opinion
- city & national
- city & peshawar
- comment
- comment & editorials
- comment & headlines & opinion
- comment & letters
- comment & national & opinion
- comment & opinion
- e-papers
- e-papers & pakistan today
- e-papers & profit magazine
- editorials
- editorials & headlines & opinion
- editorials & letters & opinion
- editorials & opinion
- education & nationa

In [5]:
broad_categories = ['Business', 'Politics', 'National', 'Sports', 'Opinion', 'Entertainment',
                    'Technology', 'Health', 'Food', 'World', 'Other']

category_mapping = {
    'Business': 'Business', 'Pakistan, Business': 'Business', 'Business, World': 'Business',
    'Balochistan, Business': 'Business', 'Punjab, Business': 'Business', 'Business, Technology': 'Business',
    'Business, K-P': 'Business', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business', 'Pakistan, Business, Islamabad': 'Business',
    'GOVERNANCE': 'Politics', 'GOVERNANCE & HEADLINES': 'Politics',
    'GOVERNANCE & HEADLINES & Top Headlines': 'Politics', 'GOVERNANCE & NATIONAL': 'Politics',
    'GOVERNANCE & HEADLINES & NATIONAL': 'Politics', 'POLITICS': 'Politics',
    'POLITICS, Pakistan': 'Politics', 'POLITICS, World': 'Politics', 'Pakistan, POLITICS': 'Politics',
    'NATIONAL': 'National', 'ISLAMABAD': 'National', 'KARACHI': 'National', 'LAHORE': 'National',
    'PESHAWAR': 'National', 'MULTAN': 'National', 'CITY': 'National', 'Rawalpindi': 'National',
    'Pakistan': 'National', 'Punjab': 'National', 'Sindh': 'National', 'K-P': 'National',
    'Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Azad Jammu & Kashmir': 'National',
    'Sports': 'Sports', 'Sports & Top Headlines': 'Sports', 'Sports & World': 'Sports',
    'Sports, K-P': 'Sports', 'Sports, Pakistan': 'Sports', 'Sports, TV': 'Sports',
    'Sports, Hockey': 'Sports', 'Sports, Cricket': 'Sports', 'Football': 'Sports', 'Tennis': 'Sports',
    'Sports, Punjab': 'Sports', 'Sports, Life & Style': 'Sports', 'Sports, Multan, Cities': 'Sports',
    'Editorials': 'Opinion', 'Editorials & Letters & Opinion': 'Opinion', 'Comment': 'Opinion',
    'Comment & Opinion': 'Opinion', 'Letters': 'Opinion', 'Opinion': 'Opinion', 'Cartoon & Opinion': 'Opinion',
    'Comment & HEADLINES & Opinion': 'Opinion', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'Entertainment': 'Entertainment', 'Life & Style': 'Entertainment', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Music': 'Entertainment', 'Life & Style, TV': 'Entertainment', 'Life & Style, Gossip': 'Entertainment',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Food': 'Entertainment',
    'Life & Style, Art and Books': 'Entertainment', 'Life & Style, Theatre': 'Entertainment',
    'Life & Style, Spotlight': 'Entertainment', 'Life & Style, Bollywood': 'Entertainment',
    'Life & Style, K-P': 'Entertainment', 'Life & Style, Health': 'Entertainment',
    'Technology': 'Technology', 'World, Technology': 'Technology', 'Technology, Business': 'Technology',
    'Technology, Sports': 'Technology', 'Technology, Food': 'Technology', 'Technology, Games': 'Technology',
    'Technology, Life & Style': 'Technology', 'Technology, Pakistan': 'Technology',
    'Health': 'Health', 'Pakistan, Health': 'Health', 'Sindh, Health': 'Health',
    'Jammu & Kashmir, Health': 'Health', 'Health, ADVICE': 'Health', 'Food, ADVICE, Health': 'Health',
    'Health, Life & Style': 'Health', 'Health, Latest': 'Health', 'Food': 'Food', 'Food, Spotlight': 'Food',
    'Technology, Food': 'Food',
    'World': 'World', 'Pakistan, World': 'World', 'World, Jammu & Kashmir': 'World', 'World, Sports': 'World',
    'World, Gilgit Baltistan': 'World', 'World, Life & Style': 'World', 'World, Music': 'World',
    'World, Fashion': 'World', 'World, Health': 'World', 'World, K-P': 'World', 'World, archives': 'World',
    'World, Videos': 'World', 'World, Opinion': 'World', 'World, Food, Technology': 'World', 'World, Bollywood': 'World',
    'World, Newslab': 'World', 'Pakistan, World, Islamabad': 'World', 'Pakistan, Islamabad, World, Cities': 'World',
    'Pakistan, World, Balochistan': 'World', 'Pakistan, Azad Jammu & Kashmir, World': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World',

    'Uncategorized': 'Other', 'SPONSORED': 'Other', 'Sponsored Content': 'Other',
    'NATIVE CONTENT': 'Other', 'Archives': 'Other', 'Slideshows, World': 'Other', 'Latest': 'Other',
    'Multimedia': 'Other', 'T.Edit': 'Other', 'NATIVE CONTENT & Pakistan Today': 'Other',
    'HEADLINES': 'Other', 'HEADLINES & Top Headlines': 'Other', 'FEATURED & top Featured': 'Other',
    'FEATURED & Top Non Business': 'Other', 'FEATURED': 'Other', 'E-papers & Profit Magazine': 'Other',
    'E-papers & Pakistan Today': 'Other', 'Top Headlines': 'Other', 'NATIONAL & Top Non Business': 'Other',
    'Editorials & Opinion': 'Opinion', 'Letters & Opinion': 'Opinion', 'NATIONAL & Top Headlines': 'Other',
    'NATIONAL & World': 'World', 'Top Non Business & World': 'World', 'LAHORE & NATIONAL': 'National',
    'Analysis & HEADLINES': 'Other', 'HEADLINES & NATIONAL': 'Other', 'CITY & LAHORE': 'National',
    'HEADLINES & NATIONAL & Top Headlines': 'Other', 'Top Headlines & World': 'World', 'KARACHI & NATIONAL': 'National',
    'NATIONAL & PESHAWAR': 'National', 'HEADLINES & KARACHI': 'Other', 'HEADLINES & LAHORE': 'Other',
    'HEADLINES & PESHAWAR': 'Other', 'INTERVIEW & Top Headlines': 'Other', 'ISLAMABAD & NATIONAL': 'National',
    'CITY & HEADLINES & LAHORE': 'Other', 'HEADLINES & Sports': 'Sports', 'KARACHI & LAHORE & NATIONAL': 'National',
    'KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Non Business': 'Other', 'FEATURED & Top Headlines': 'Other',
    'NATIONAL & Sports': 'Sports', 'KARACHI & NATIONAL & Top Headlines': 'National', 'CITY & ISLAMABAD': 'National',
    'HEADLINES & Top Headlines & World': 'World', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'HEADLINES & LAHORE & NATIONAL': 'Other', 'Analysis & HEADLINES & NATIONAL & Top Headlines': 'Other',
    'Book Review': 'Other', 'LAHORE & NATIONAL & Top Headlines': 'National',
    'HEADLINES & LAHORE & NATIONAL & Top Headlines': 'Other', 'CITY & KARACHI & NATIONAL': 'National',
    'LAHORE & NATIONAL & PESHAWAR': 'National', 'Comment & Editorials': 'Opinion',
    'HEADLINES & Sports & Top Headlines': 'Sports', 'Book Review & E-papers & Pakistan Today': 'Other',
    'CITY & HEADLINES & NATIONAL': 'Other', 'CITY & NATIONAL': 'National', 'FEATURED & NATIONAL': 'Other',
    'ISLAMABAD & KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Headlines & World': 'World',
    'LAHORE & Top Headlines': 'Other', 'HEADLINES & ISLAMABAD & NATIONAL': 'Other',
    'HEADLINES & NATIONAL & Sports & Top Headlines': 'Sports', 'NATIONAL & Sports & Top Headlines': 'Sports',
    'Analysis & E-papers & Pakistan Today': 'Other', 'CITY & HEADLINES': 'Other', 'Sports & World': 'Sports',
    'HEADLINES & ISLAMABAD & LAHORE & NATIONAL': 'Other', 'CITY & FEATURED & LAHORE': 'Other',
    'NATIONAL & Opinion': 'Opinion', 'Entertainment & World': 'Entertainment',
    'HEADLINES & ISLAMABAD & NATIONAL & Pakistan Today': 'Other',
    'NATIONAL & Sports & Top Headlines & Top Non Business': 'Sports', 'HEADLINES & KARACHI & NATIONAL': 'Other',
    'OIC & World': 'World', 'Agriculture & NATIONAL': 'National', 'NATIONAL & top Featured': 'Other',
    'CITY & Education & MULTAN': 'National', 'Cartoon & Letters': 'Opinion', 'FEATURED & HEADLINES & NATIONAL': 'Other',
    'FEATURED & Sports': 'Sports', 'Comment & Letters': 'Opinion', 'ISLAMABAD & SPONSORED': 'Other',
    'top Featured & World': 'World', 'HEADLINES & NATIONAL & top Featured': 'Other',
    'NATIONAL & Top Headlines & World': 'World', 'NATIONAL & top Featured & World': 'World',
    'MULTAN & NATIONAL': 'National', 'NATIONAL & top Featured & Top Headlines': 'Other',
    'NATIONAL & PESHAWAR & Top Headlines': 'National', 'CITY & PESHAWAR': 'National',
    'HEADLINES & Letters & Opinion': 'Opinion', 'CITY & LAHORE & Opinion': 'Opinion',
    'ISLAMABAD & Pakistan Today': 'Other', 'top Featured & Top Headlines': 'Other', 'top Featured': 'Other',
    'PESHAWAR & Sports': 'Sports', 'Sports & top Featured': 'Sports', 'E-papers': 'Other',
    'CITY & ISLAMABAD & NATIONAL': 'National', 'Comment & NATIONAL & Opinion': 'Opinion',
    'Book Review & NATIONAL': 'Other', 'Entertainment & NATIONAL': 'Entertainment', 'Analysis & NATIONAL': 'Other',
    'NATIONAL & Pakistan Today': 'Other', 'Cartoon & Editorials': 'Opinion', 'CITY & LAHORE & NATIONAL': 'National',
    'Education & NATIONAL': 'National', 'NATIVE CONTENT & Pakistan Today': 'Other',

    'Pakistan, Punjab': 'National', 'Pakistan, K-P': 'National', 'Life & Style, Film, Gossip': 'Entertainment',
    'Life & Style, TV': 'Entertainment', 'Sindh': 'National', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Gossip': 'Entertainment', 'Life & Style, Music': 'Entertainment', 'Punjab': 'National', 'K-P': 'National',
    'Editorial': 'Opinion', 'Balochistan': 'National', 'Sindh, Health': 'Health', 'Pakistan, Life & Style': 'Entertainment',
    'Jammu & Kashmir, Health': 'Health', 'Pakistan, Sindh': 'National', 'K-P, Music': 'Entertainment',
    'Life & Style': 'Entertainment', 'Pakistan, Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Jammu & Kashmir': 'National',
    'Life & Style, Art and Books, Music': 'Entertainment', 'Life & Style, Fashion, Gossip': 'Entertainment',
    'Life & Style, Music, Gossip': 'Entertainment', 'Pakistan, Jammu & Kashmir': 'National', 'Sindh, Punjab': 'National',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Film, TV': 'Entertainment', 'Balochistan, Business': 'Business',
    'Life & Style, Health': 'Health', 'Punjab, Business': 'Business', 'Music, Film': 'Entertainment', 'TV': 'Entertainment',
    'Life & Style, Music, Food': 'Entertainment', 'Pakistan, Health': 'Health', 'Balochistan, K-P': 'National',
    'Sindh, Technology': 'Technology', 'Film': 'Entertainment', 'Sindh, Life & Style, Music': 'Entertainment',
    'Life & Style, Gossip, TV': 'Entertainment', 'Life & Style, Art and Books': 'Entertainment', 'K-P, Technology': 'Technology',
    'Magazine': 'Other', 'Film, Gossip': 'Entertainment', 'Life & Style, Theatre': 'Entertainment', 'Business, Technology': 'Business',
    'Balochistan, Gilgit Baltistan': 'National', 'K-P, Health': 'Health', 'Pakistan, Gilgit Baltistan': 'National',
    'Life & Style, Film, Fashion': 'Entertainment', 'Fashion': 'Entertainment', 'Punjab, World': 'World',
    'Pakistan, Sports': 'Sports', 'Pakistan, Technology': 'Technology', 'Balochistan, Health': 'Health',
    'Pakistan, Sindh, Art and Books': 'Entertainment', 'Life & Style, Fashion, TV': 'Entertainment', 'TV, Theatre': 'Entertainment',
    'Life & Style, Food': 'Food', 'Pakistan, Film': 'Entertainment', 'Health': 'Health', 'World, K-P': 'World',
    'Life & Style, K-P': 'Entertainment', 'Art and Books': 'Entertainment', 'Opinion, Health': 'Opinion',
    'Life & Style, Music, TV': 'Entertainment', 'Sports, Life & Style': 'Sports', 'Sindh, Jammu & Kashmir': 'National',
    'Sindh, Business': 'Business', 'Life & Style, Film, Theatre': 'Entertainment', 'Sindh, Sports': 'Sports',
    'archives': 'Other', 'Music, Health': 'Health', 'Punjab, Technology': 'Technology', 'Punjab, Health': 'Health',
    'Life & Style, Food, Gossip': 'Entertainment', 'Life & Style, Food, Health': 'Health', 'Sports, K-P': 'Sports',
    'Sindh, Life & Style': 'Entertainment', 'Life & Style, Music, Theatre': 'Entertainment', 'Sports, Videos': 'Sports',
    'Opinion, Technology': 'Opinion', 'Sindh, World': 'World', 'Pakistan, K-P, Health': 'Health',
    'Pakistan, Sindh, Punjab': 'National', 'Life & Style, Opinion': 'Opinion', 'Balochistan, Life & Style': 'Entertainment',
    'Life & Style, Art and Books, Film': 'Entertainment', 'Pakistan, Opinion': 'Opinion', 'Punjab, Jammu & Kashmir': 'National',
    'Punjab, Sports': 'Sports', 'Life & Style, Technology': 'Technology', 'Gossip': 'Entertainment',
    'Life & Style, Music, Film': 'Entertainment', 'Life & Style, Health, TV': 'Health', 'Technology, Games': 'Technology',
    'Pakistan, World, Jammu & Kashmir': 'World', 'Life & Style, Music, Fashion': 'Entertainment',
    'Life & Style, Art and Books, Health': 'Health', 'Sindh, Videos': 'Other', 'Punjab, Food': 'Food',
    'Life & Style, Film, Health': 'Health', 'Sports, Multan, Cities': 'Sports', 'Music, Fashion': 'Entertainment',
    'Videos': 'Other', 'K-P, Art and Books': 'Entertainment', 'Music, Gossip': 'Entertainment',
    'Business, Jammu & Kashmir': 'Business', 'Sindh, Balochistan': 'National', 'Opinion, Editorial': 'Opinion',
    'Pakistan, archives': 'Other', 'Jammu & Kashmir, Gilgit Baltistan': 'National', 'Punjab, K-P': 'National',
    'Business, K-P': 'Business', 'Life & Style, Fashion, Health': 'Health', 'World, Azad Jammu & Kashmir': 'World',
    'Life &amp; Style, TV': 'Entertainment', 'Sindh, Business, Health': 'Business', 'Sports, Business': 'Sports',
    'Punjab, Business, Lahore, Cities': 'Business', 'Punjab, Life & Style, Gossip': 'Entertainment',
    'Life & Style, Art and Books, TV': 'Entertainment', 'Sindh, Karachi, Cities': 'National', 'Sindh, archives': 'Other',
    'Pakistan, Islamabad': 'National', 'life and style': 'Entertainment', 'life and style, Music': 'Entertainment',
    'Islamabad': 'National', 'Pakistan, Health, Food': 'Health', 'Pakistan, Khyber-Pakhtunkhwa': 'National',
    'Music, Film, Theatre': 'Entertainment', 'Music': 'Entertainment', 'Cricket': 'Sports',
    'Film, Gossip, Bollywood': 'Entertainment', 'Khyber-Pakhtunkhwa': 'National', 'Sindh, Karachi': 'National',
    'Khyber-Pakhtunkhwa, Pakistan': 'National', 'World, Azad Jammu & Kashmir, Cities': 'World',
    'Pakistan, Islamabad, Cities': 'National', 'Gossip, TV, Music': 'Entertainment',
    'Music, Pakistan, Life & Style': 'Entertainment', 'Bollywood, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir': 'National', 'Art and Books, Film, Games': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Cities': 'National', 'Sindh, Hyderabad': 'National', 'Sindh, Cities': 'National',
    'Pakistan, Sindh, Cities': 'National', 'Art and Books, Film': 'Entertainment', 'Pakistan, Lahore': 'National',
    'Sports, TV, Gossip': 'Sports', 'Punjab, Pakistan, Lahore': 'National', 'Games': 'Other',
    'Khyber-Pakhtunkhwa, Swat': 'National', 'Pakistan, Sindh, Karachi, Business': 'Business',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National', 'Football': 'Sports', 'Pakistan, Peshawar': 'National',
    'TV, Sports': 'Sports', 'Khyber-Pakhtunkhwa, Cities, Peshawar': 'National', 'Rawalpindi': 'National',
    'Slideshows, World': 'World', 'Pakistan, Azad Jammu & Kashmir, Cities': 'National',
    'Pakistan, Cities, Khyber Pakhtunkhwa': 'National', 'Gossip, Film': 'Entertainment', 'Health, ADVICE': 'Health',
    'TV, Film': 'Entertainment', 'Pakistan, Lahore, Cities': 'National', 'Pakistan, Gwadar, Cities': 'National',
    'Rawalpindi, Punjab': 'National', 'Punjab, Rawalpindi': 'National', 'Khyber-Pakhtunkhwa, Peshawar': 'National',
    'Punjab, Lahore': 'National', 'Pakistan, Balochistan, Cities': 'National', 'Sindh, khairpur': 'National',
    'Islamabad, Pakistan, Cities': 'National', 'Pakistan, Khyber-Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Karachi': 'National', 'Khyber-Pakhtunkhwa, Abbottabad': 'National', 'TV, Gossip': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Gilgit-Baltistan': 'National', 'World, Pakistan': 'World',
    'Khyber-Pakhtunkhwa, Cities': 'National', 'Fashion, Gossip': 'Entertainment', 'Islamabad, World': 'World',
    'Punjab, Multan': 'National', 'Punjab, Faisalabad': 'National', 'Pakistan, Islamabad, Sindh': 'National',
    'Pakistan, Karachi, Cities': 'National', 'Islamabad, Business': 'Business', 'Sindh, Pakistan': 'National',
    'Pakistan, Islamabad, Punjab': 'National', 'Pakistan, Sindh, Karachi, Cities': 'National',
    'Pakistan, Cities, Azad Jammu & Kashmir': 'National', 'Health, Life & Style, ADVICE': 'Health',
    'Islamabad, Balochistan': 'National', 'Pakistan, Gilgit-Baltistan': 'National', 'Pakistan, Punjab, Cities': 'National',
    'Sindh, tharparkar': 'National', 'Business, Gilgit-Baltistan': 'Business', 'Khyber Pakhtunkhwa': 'National',
    'Film, TV, Life & Style, life and style': 'Entertainment', 'TV, Film, Life & Style': 'Entertainment',
    'Film, Life & Style': 'Entertainment', 'Pakistan, Cities, Lahore': 'National',
    'Life & Style, Gossip, Film': 'Entertainment', 'Film, Art and Books': 'Entertainment', 'Food, ADVICE, Health': 'Health',
    'Gossip, Fashion': 'Entertainment', 'Gossip, Life & Style': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Mardan': 'National', 'Pakistan, Punjab, Lahore': 'National',
    'Life & Style, Gossip, Fashion': 'Entertainment', 'Pakistan, Cities': 'National',
    'Islamabad, Rawalpindi': 'National', 'Islamabad, Pakistan': 'National', 'Pakistan, Islamabad, World': 'World',
    'Pakistan, Sindh, Nawabshah': 'National', 'Balochistan, Islamabad, Pakistan': 'National', 'Fashion, Life & Style': 'Entertainment',
    'Health, ADVICE, Life & Style': 'Health', 'Music, Life & Style': 'Entertainment',
    'Pakistan, Peshawar, Islamabad, Cities': 'National', 'Punjab, Gilgit-Baltistan': 'National', 'Health, World': 'Health',
    'Gossip, Life & Style, Music': 'Entertainment', 'Gossip, Music': 'Entertainment', 'Gossip, Film, TV': 'Entertainment',
    'Technology, Life & Style': 'Technology', 'Sindh, Pakistan, Karachi, Cities': 'National',
    'Gossip, TV, Film, Life & Style': 'Entertainment', 'Pakistan, World, Islamabad, Cities': 'World',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'Sindh, Nawabshah': 'National', 'Azad Jammu & Kashmir, Pakistan': 'National',
    'TV, Gossip, Life & Style': 'Entertainment', 'Film, TV, Life & Style': 'Entertainment',
    'Khyber Pakhtunkhwa, Peshawar, Sports': 'Sports', 'Film, Gossip, Life & Style': 'Entertainment',
    'Rawalpindi, Islamabad': 'National', 'Sindh, sukkur': 'National', 'Pakistan, World, Islamabad': 'World',
    'Pakistan, Sindh, Karachi, Islamabad': 'National', 'Business, Pakistan': 'Business',
    'ADVICE, Life & Style, Film': 'Entertainment', 'Film, Life & Style, TV, Gossip': 'Entertainment', 'Music, TV': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Islamabad': 'National', 'Music, Gossip, Life & Style': 'Entertainment',
    'Islamabad, Cities, Pakistan': 'National', 'Islamabad, Punjab': 'National', 'Health, Life & Style': 'Health',
    'Technology, World': 'Technology', 'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Hyderabad': 'National', 'Life & Style, TV, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'ADVICE, Life & Style': 'Health', 'Pakistan, Sindh, Balochistan': 'National',
    'Gossip, Life & Style, TV': 'Entertainment', 'Music, Film, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Islamabad': 'National', 'Pakistan, World, Azad Jammu & Kashmir': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World', 'Rawalpindi, Pakistan, Business': 'Business',
    'Khyber Pakhtunkhwa, Peshawar': 'National', 'Sindh, dadu': 'National', 'Fashion, Gossip, Life & Style': 'Entertainment',
    'Pakistan, Karachi, Sindh': 'National', 'Pakistan, Balochistan, Quetta': 'National', 'TV, Life & Style': 'Entertainment',
    'Pakistan, Islamabad, World, Cities': 'World', 'Pakistan, Khyber Pakhtunkhwa, Cities': 'National',
    'Pakistan, Karachi, Sindh, Cities': 'National', 'Balochistan, Quetta': 'National', 'Pakistan, Rawalpindi': 'National',
    'Punjab, Film': 'Entertainment', 'Khyber Pakhtunkhwa, Swat': 'National', 'Balochistan, Gwadar': 'National',
    'TV, Life & Style, Gossip': 'Entertainment', 'Sindh, Karachi, Pakistan, Cities': 'National',
    'Pakistan, Sindh, Islamabad': 'National', 'Pakistan, Punjab, Islamabad, Cities': 'National',
    'Khyber Pakhtunkhwa, Nowshera': 'National', 'TV, Film, Gossip': 'Entertainment', 'Abbottabad': 'National',
    'Pakistan, Karachi, Peshawar': 'National', 'Pakistan, Khyber Pakhtunkhwa': 'National', 'Sports, Hockey': 'Sports',
    'Art and Books, Life & Style': 'Entertainment', 'Pakistan, World, Balochistan': 'World', 'Theatre': 'Entertainment',
    'Gossip, Film, Life & Style': 'Entertainment', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business',
    'Film, TV, Gossip, Life & Style': 'Entertainment', 'Life & Style, Sports': 'Sports',
    'Music, Art and Books, Life & Style, Film': 'Entertainment', 'Pakistan, Sindh, Karachi, Hyderabad, Cities': 'National',
    'Life & Style, ADVICE': 'Health',
    'Pakistan, Sindh, Punjab, Azad Jammu & Kashmir, Gilgit-Baltistan, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Karachi': 'National', 'Fashion, Music': 'Entertainment', 'Film, TV': 'Entertainment',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra, Nowshera, Charsadda': 'National', 'Fashion, Gossip, TV': 'Entertainment',
    'Karachi, Pakistan': 'National', 'Pakistan, Azad Jammu & Kashmir, Muzaffarabad': 'National',
    'Pakistan, Azad Jammu & Kashmir, World': 'World', 'Film, Music, Gossip': 'Entertainment',
    'Pakistan, Punjab, Rawalpindi': 'National', 'Pakistan, Lahore, Punjab': 'National', 'World, Newslab': 'World',
    'Karachi': 'National', 'Pakistan, Islamabad, Azad Jammu & Kashmir': 'National', 'Gossip, Fashion, Life & Style': 'Entertainment',
    'Pakistan, Rawalpindi, Karachi': 'National', 'Art and Books, Gossip, Life & Style': 'Entertainment',
    'Film, Theatre, Life & Style': 'Entertainment', 'Pakistan, Punjab, Islamabad': 'National',
    'Gilgit-Baltistan, Pakistan': 'National',
    'World, Business': 'Business',
    'Gilgit-Baltistan': 'National',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Swat': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other'
}

# Create a new mapping with all keys converted to lowercase
lowercase_category_mapping = {k.lower(): v for k, v in category_mapping.items()}

for name, df in dataframes.items():

    if 'categories' in df.columns:
        df['categories'] = df['categories'].astype(str).str.lower()

    # Apply the lowercase mapping
    df['mapped_categories'] = df['categories'].map(lowercase_category_mapping)

for name, df in dataframes.items():
    unique_mapped_categories = df['mapped_categories'].unique()
    print(f"\nUnique mapped categories in {name}:")
    print(unique_mapped_categories)
    nan_count = df['mapped_categories'].isnull().sum()
    print(f"Number of NaN values in 'mapped_categories' in {name}: {nan_count}")


Unique mapped categories in pakistan_today(full-data):
['Business' 'Other' 'Politics' 'National' 'Opinion' 'World' 'Sports'
 'Entertainment']
Number of NaN values in 'mapped_categories' in pakistan_today(full-data): 0


In [None]:
filtered_dataframes = {}
output_dir = data_path

for name, df in dataframes.items():
    # Filter for 'National' or 'World' categories
    filtered_df = df[df['mapped_categories'].isin(['National', 'World'])]

    filtered_dataframes[name + '_filtered'] = filtered_df

    # Define the output file path dynamically based on the dataframe name
    output_filename = name.replace(' ', '_').replace('/', '_') + '_filtered.csv'
    output_file_path = os.path.join(output_dir, output_filename)

    # Save the filtered DataFrame to CSV
    filtered_df.to_csv(output_file_path, index=False)

    print(f"Filtered data saved to: {output_file_path}")
    print(f"\nFiltered DataFrame: {name}_filtered")
    display(filtered_df.head())
    print(f"Shape of filtered DataFrame: {filtered_df.shape}")

Filtered data saved to: pakistan_today.csv

Filtered DataFrame: Copy of Copy of pakistan_today(full-data)_filtered


Unnamed: 0,headline,date,link,source,categories,description,mapped_categories
6870,CJP to hear Karak temple attack case on Tuesda...,12/31/2020 14:36,https://www.pakistantoday.com.pk/2020/12/31/14...,Pakistan Today,national,ISLAMABAD: Chief Justice of Pakistan (CJP...,National
6872,NAB secures 14-day remand of Asif,12/31/2020 15:30,https://www.pakistantoday.com.pk/2020/12/31/na...,Pakistan Today,national,LAHORE: An accountability in Lahore on Thurs...,National
6877,PMC response sought in license case,12/31/2020 16:37,https://www.pakistantoday.com.pk/2020/12/31/pm...,Pakistan Today,national,ISLAMABAD: The Islamabad High Court (IHC) has...,National
6881,Saudi-led coalition strikes at Yemen capital a...,12/31/2020 16:47,https://www.pakistantoday.com.pk/2020/12/31/sa...,Pakistan Today,world,ADEN: Saudi-led coalition warplanes struck tar...,World
6884,"Asif, another PML-N leader asked to quit party...",12/31/2020 17:11,https://www.pakistantoday.com.pk/2020/12/31/as...,Pakistan Today,national,LAHORE: Former National Assembly (NA) speaker ...,National


Shape of filtered DataFrame: (27427, 7)


## **Daily Times**

In [6]:
selected_files1 = files[2:3]
print("Selected files:", selected_files1)

dataframes = {}

for file in selected_files1:
    file_path = os.path.join(data_path, file)
    df_name = os.path.splitext(file)[0]
    try:
        # load CSV
        df = pd.read_csv(file_path, encoding="latin-1", index_col=False)

        # remove unnamed columns
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

        # store cleaned dataframe
        dataframes[df_name] = df
        print(f"Loaded {file} (cleaned unnamed columns)")
    except Exception as e:
        print(f"Error loading {file}: {e}")

# display summary
for name, df in dataframes.items():
    print(f"\nDataFrame: {name}")
    display(df.head())
    print(f"Columns: {df.columns.tolist()}")


Selected files: ['daily_times(full-data).csv']


  df = pd.read_csv(file_path, encoding="latin-1", index_col=False)


Loaded daily_times(full-data).csv (cleaned unnamed columns)

DataFrame: daily_times(full-data)


Unnamed: 0,headline,date,link,source,categories,description
0,Two abductees recovered after encounter in Kha...,1/1/2020 0:47,https://dailytimes.com.pk/530409/two-abductees...,Daily Times,Pakistan,Shikarpur police have succeeded in getting two...
1,NUMS holds PMC NEB Step-II examination,1/1/2020 0:53,https://dailytimes.com.pk/530410/nums-holds-pm...,Daily Times,Pakistan,After the successful conduct of four NEB exami...
2,Team Cell hosts advanced training Bootcamp for...,1/1/2020 0:59,https://dailytimes.com.pk/530411/team-cell-hos...,Daily Times,Pakistan,"Team Cell hosted Islamabad Semester for SMEs, ..."
3,"The City School, Rijas Aces Property team up t...",1/1/2020 1:10,https://dailytimes.com.pk/530414/the-city-scho...,Daily Times,Pakistan,The City School Group Pvt Ltd. is venturing in...
4,Senate body discusses ecological and aquatic e...,1/1/2020 1:14,https://dailytimes.com.pk/530412/senate-body-d...,Daily Times,Pakistan,Senate Functional Committee on Human Rights in...


Columns: ['headline', 'date', 'link', 'source', 'categories', 'description']


In [7]:
all_unique_categories = set()

for df_name, df in dataframes.items():
    if 'categories' in df.columns:
        # Ensure 'categories' column is treated as strings and handle NaNs
        df['categories'] = df['categories'].astype(str).str.lower()
        for categories_str in df['categories'].dropna().unique():
            # Split by comma and clean up whitespace
            for category in categories_str.split(','):
                cleaned_category = category.strip()
                if cleaned_category and cleaned_category != 'nan':
                    all_unique_categories.add(cleaned_category)

print("Unique Categories:")
for category in sorted(list(all_unique_categories)):
    print(f"- {category}")

Unique Categories:
- arts
- balochistan
- balochistan & business
- balochistan & business & pakistan & pakistan
- balochistan & business & pakistan & pakistan & top stories
- balochistan & business & pakistan & top stories
- balochistan & education
- balochistan & health
- balochistan & health & pakistan
- balochistan & islamabad & khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories
- balochistan & islamabad & khyber pakhtunkhwa & pakistan & punjab & sindh & top stories
- balochistan & islamabad & pakistan & pakistan
- balochistan & islamabad & punjab & sindh
- balochistan & khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories
- balochistan & khyber pakhtunkhwa & pakistan & punjab & sindh & top stories
- balochistan & pakistan
- balochistan & pakistan & pakistan
- balochistan & pakistan & pakistan & top stories
- balochistan & pakistan & sindh
- balochistan & pakistan & top stories
- balochistan & reviews
- balochistan & sindh
- balochistan & spor

In [8]:
broad_categories = ['Business', 'Politics', 'National', 'Sports', 'Opinion', 'Entertainment',
                    'Technology', 'Health', 'Food', 'World', 'Other']

category_mapping = {
    'Business': 'Business', 'Pakistan, Business': 'Business', 'Business, World': 'Business',
    'Balochistan, Business': 'Business', 'Punjab, Business': 'Business', 'Business, Technology': 'Business',
    'Business, K-P': 'Business', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business', 'Pakistan, Business, Islamabad': 'Business',
    'GOVERNANCE': 'Politics', 'GOVERNANCE & HEADLINES': 'Politics',
    'GOVERNANCE & HEADLINES & Top Headlines': 'Politics', 'GOVERNANCE & NATIONAL': 'Politics',
    'GOVERNANCE & HEADLINES & NATIONAL': 'Politics', 'POLITICS': 'Politics',
    'POLITICS, Pakistan': 'Politics', 'POLITICS, World': 'Politics', 'Pakistan, POLITICS': 'Politics',
    'NATIONAL': 'National', 'ISLAMABAD': 'National', 'KARACHI': 'National', 'LAHORE': 'National',
    'PESHAWAR': 'National', 'MULTAN': 'National', 'CITY': 'National', 'Rawalpindi': 'National',
    'Pakistan': 'National', 'Punjab': 'National', 'Sindh': 'National', 'K-P': 'National',
    'Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Azad Jammu & Kashmir': 'National',
    'Sports': 'Sports', 'Sports & Top Headlines': 'Sports', 'Sports & World': 'Sports',
    'Sports, K-P': 'Sports', 'Sports, Pakistan': 'Sports', 'Sports, TV': 'Sports',
    'Sports, Hockey': 'Sports', 'Sports, Cricket': 'Sports', 'Football': 'Sports', 'Tennis': 'Sports',
    'Sports, Punjab': 'Sports', 'Sports, Life & Style': 'Sports', 'Sports, Multan, Cities': 'Sports',
    'Editorials': 'Opinion', 'Editorials & Letters & Opinion': 'Opinion', 'Comment': 'Opinion',
    'Comment & Opinion': 'Opinion', 'Letters': 'Opinion', 'Opinion': 'Opinion', 'Cartoon & Opinion': 'Opinion',
    'Comment & HEADLINES & Opinion': 'Opinion', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'Entertainment': 'Entertainment', 'Life & Style': 'Entertainment', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Music': 'Entertainment', 'Life & Style, TV': 'Entertainment', 'Life & Style, Gossip': 'Entertainment',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Food': 'Entertainment',
    'Life & Style, Art and Books': 'Entertainment', 'Life & Style, Theatre': 'Entertainment',
    'Life & Style, Spotlight': 'Entertainment', 'Life & Style, Bollywood': 'Entertainment',
    'Life & Style, K-P': 'Entertainment', 'Life & Style, Health': 'Entertainment',
    'Technology': 'Technology', 'World, Technology': 'Technology', 'Technology, Business': 'Technology',
    'Technology, Sports': 'Technology', 'Technology, Food': 'Technology', 'Technology, Games': 'Technology',
    'Technology, Life & Style': 'Technology', 'Technology, Pakistan': 'Technology',
    'Health': 'Health', 'Pakistan, Health': 'Health', 'Sindh, Health': 'Health',
    'Jammu & Kashmir, Health': 'Health', 'Health, ADVICE': 'Health', 'Food, ADVICE, Health': 'Health',
    'Health, Life & Style': 'Health', 'Health, Latest': 'Health', 'Food': 'Food', 'Food, Spotlight': 'Food',
    'Technology, Food': 'Food',
    'World': 'World', 'Pakistan, World': 'World', 'World, Jammu & Kashmir': 'World', 'World, Sports': 'World',
    'World, Gilgit Baltistan': 'World', 'World, Life & Style': 'World', 'World, Music': 'World',
    'World, Fashion': 'World', 'World, Health': 'World', 'World, K-P': 'World', 'World, archives': 'World',
    'World, Videos': 'World', 'World, Opinion': 'World', 'World, Food, Technology': 'World', 'World, Bollywood': 'World',
    'World, Newslab': 'World', 'Pakistan, World, Islamabad': 'World', 'Pakistan, Islamabad, World, Cities': 'World',
    'Pakistan, World, Balochistan': 'World', 'Pakistan, Azad Jammu & Kashmir, World': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World',

    'Uncategorized': 'Other', 'SPONSORED': 'Other', 'Sponsored Content': 'Other',
    'NATIVE CONTENT': 'Other', 'Archives': 'Other', 'Slideshows, World': 'Other', 'Latest': 'Other',
    'Multimedia': 'Other', 'T.Edit': 'Other', 'NATIVE CONTENT & Pakistan Today': 'Other',
    'HEADLINES': 'Other', 'HEADLINES & Top Headlines': 'Other', 'FEATURED & top Featured': 'Other',
    'FEATURED & Top Non Business': 'Other', 'FEATURED': 'Other', 'E-papers & Profit Magazine': 'Other',
    'E-papers & Pakistan Today': 'Other', 'Top Headlines': 'Other', 'NATIONAL & Top Non Business': 'Other',
    'Editorials & Opinion': 'Opinion', 'Letters & Opinion': 'Opinion', 'NATIONAL & Top Headlines': 'Other',
    'NATIONAL & World': 'World', 'Top Non Business & World': 'World', 'LAHORE & NATIONAL': 'National',
    'Analysis & HEADLINES': 'Other', 'HEADLINES & NATIONAL': 'Other', 'CITY & LAHORE': 'National',
    'HEADLINES & NATIONAL & Top Headlines': 'Other', 'Top Headlines & World': 'World', 'KARACHI & NATIONAL': 'National',
    'NATIONAL & PESHAWAR': 'National', 'HEADLINES & KARACHI': 'Other', 'HEADLINES & LAHORE': 'Other',
    'HEADLINES & PESHAWAR': 'Other', 'INTERVIEW & Top Headlines': 'Other', 'ISLAMABAD & NATIONAL': 'National',
    'CITY & HEADLINES & LAHORE': 'Other', 'HEADLINES & Sports': 'Sports', 'KARACHI & LAHORE & NATIONAL': 'National',
    'KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Non Business': 'Other', 'FEATURED & Top Headlines': 'Other',
    'NATIONAL & Sports': 'Sports', 'KARACHI & NATIONAL & Top Headlines': 'National', 'CITY & ISLAMABAD': 'National',
    'HEADLINES & Top Headlines & World': 'World', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'HEADLINES & LAHORE & NATIONAL': 'Other', 'Analysis & HEADLINES & NATIONAL & Top Headlines': 'Other',
    'Book Review': 'Other', 'LAHORE & NATIONAL & Top Headlines': 'National',
    'HEADLINES & LAHORE & NATIONAL & Top Headlines': 'Other', 'CITY & KARACHI & NATIONAL': 'National',
    'LAHORE & NATIONAL & PESHAWAR': 'National', 'Comment & Editorials': 'Opinion',
    'HEADLINES & Sports & Top Headlines': 'Sports', 'Book Review & E-papers & Pakistan Today': 'Other',
    'CITY & HEADLINES & NATIONAL': 'Other', 'CITY & NATIONAL': 'National', 'FEATURED & NATIONAL': 'Other',
    'ISLAMABAD & KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Headlines & World': 'World',
    'LAHORE & Top Headlines': 'Other', 'HEADLINES & ISLAMABAD & NATIONAL': 'Other',
    'HEADLINES & NATIONAL & Sports & Top Headlines': 'Sports', 'NATIONAL & Sports & Top Headlines': 'Sports',
    'Analysis & E-papers & Pakistan Today': 'Other', 'CITY & HEADLINES': 'Other', 'Sports & World': 'Sports',
    'HEADLINES & ISLAMABAD & LAHORE & NATIONAL': 'Other', 'CITY & FEATURED & LAHORE': 'Other',
    'NATIONAL & Opinion': 'Opinion', 'Entertainment & World': 'Entertainment',
    'HEADLINES & ISLAMABAD & NATIONAL & Pakistan Today': 'Other',
    'NATIONAL & Sports & Top Headlines & Top Non Business': 'Sports', 'HEADLINES & KARACHI & NATIONAL': 'Other',
    'OIC & World': 'World', 'Agriculture & NATIONAL': 'National', 'NATIONAL & top Featured': 'Other',
    'CITY & Education & MULTAN': 'National', 'Cartoon & Letters': 'Opinion', 'FEATURED & HEADLINES & NATIONAL': 'Other',
    'FEATURED & Sports': 'Sports', 'Comment & Letters': 'Opinion', 'ISLAMABAD & SPONSORED': 'Other',
    'top Featured & World': 'World', 'HEADLINES & NATIONAL & top Featured': 'Other',
    'NATIONAL & Top Headlines & World': 'World', 'NATIONAL & top Featured & World': 'World',
    'MULTAN & NATIONAL': 'National', 'NATIONAL & top Featured & Top Headlines': 'Other',
    'NATIONAL & PESHAWAR & Top Headlines': 'National', 'CITY & PESHAWAR': 'National',
    'HEADLINES & Letters & Opinion': 'Opinion', 'CITY & LAHORE & Opinion': 'Opinion',
    'ISLAMABAD & Pakistan Today': 'Other', 'top Featured & Top Headlines': 'Other', 'top Featured': 'Other',
    'PESHAWAR & Sports': 'Sports', 'Sports & top Featured': 'Sports', 'E-papers': 'Other',
    'CITY & ISLAMABAD & NATIONAL': 'National', 'Comment & NATIONAL & Opinion': 'Opinion',
    'Book Review & NATIONAL': 'Other', 'Entertainment & NATIONAL': 'Entertainment', 'Analysis & NATIONAL': 'Other',
    'NATIONAL & Pakistan Today': 'Other', 'Cartoon & Editorials': 'Opinion', 'CITY & LAHORE & NATIONAL': 'National',
    'Education & NATIONAL': 'National', 'NATIVE CONTENT & Pakistan Today': 'Other',

    'Pakistan, Punjab': 'National', 'Pakistan, K-P': 'National', 'Life & Style, Film, Gossip': 'Entertainment',
    'Life & Style, TV': 'Entertainment', 'Sindh': 'National', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Gossip': 'Entertainment', 'Life & Style, Music': 'Entertainment', 'Punjab': 'National', 'K-P': 'National',
    'Editorial': 'Opinion', 'Balochistan': 'National', 'Sindh, Health': 'Health', 'Pakistan, Life & Style': 'Entertainment',
    'Jammu & Kashmir, Health': 'Health', 'Pakistan, Sindh': 'National', 'K-P, Music': 'Entertainment',
    'Life & Style': 'Entertainment', 'Pakistan, Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Jammu & Kashmir': 'National',
    'Life & Style, Art and Books, Music': 'Entertainment', 'Life & Style, Fashion, Gossip': 'Entertainment',
    'Life & Style, Music, Gossip': 'Entertainment', 'Pakistan, Jammu & Kashmir': 'National', 'Sindh, Punjab': 'National',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Film, TV': 'Entertainment', 'Balochistan, Business': 'Business',
    'Life & Style, Health': 'Health', 'Punjab, Business': 'Business', 'Music, Film': 'Entertainment', 'TV': 'Entertainment',
    'Life & Style, Music, Food': 'Entertainment', 'Pakistan, Health': 'Health', 'Balochistan, K-P': 'National',
    'Sindh, Technology': 'Technology', 'Film': 'Entertainment', 'Sindh, Life & Style, Music': 'Entertainment',
    'Life & Style, Gossip, TV': 'Entertainment', 'Life & Style, Art and Books': 'Entertainment', 'K-P, Technology': 'Technology',
    'Magazine': 'Other', 'Film, Gossip': 'Entertainment', 'Life & Style, Theatre': 'Entertainment', 'Business, Technology': 'Business',
    'Balochistan, Gilgit Baltistan': 'National', 'K-P, Health': 'Health', 'Pakistan, Gilgit Baltistan': 'National',
    'Life & Style, Film, Fashion': 'Entertainment', 'Fashion': 'Entertainment', 'Punjab, World': 'World',
    'Pakistan, Sports': 'Sports', 'Pakistan, Technology': 'Technology', 'Balochistan, Health': 'Health',
    'Pakistan, Sindh, Art and Books': 'Entertainment', 'Life & Style, Fashion, TV': 'Entertainment', 'TV, Theatre': 'Entertainment',
    'Life & Style, Food': 'Food', 'Pakistan, Film': 'Entertainment', 'Health': 'Health', 'World, K-P': 'World',
    'Life & Style, K-P': 'Entertainment', 'Art and Books': 'Entertainment', 'Opinion, Health': 'Opinion',
    'Life & Style, Music, TV': 'Entertainment', 'Sports, Life & Style': 'Sports', 'Sindh, Jammu & Kashmir': 'National',
    'Sindh, Business': 'Business', 'Life & Style, Film, Theatre': 'Entertainment', 'Sindh, Sports': 'Sports',
    'archives': 'Other', 'Music, Health': 'Health', 'Punjab, Technology': 'Technology', 'Punjab, Health': 'Health',
    'Life & Style, Food, Gossip': 'Entertainment', 'Life & Style, Food, Health': 'Health', 'Sports, K-P': 'Sports',
    'Sindh, Life & Style': 'Entertainment', 'Life & Style, Music, Theatre': 'Entertainment', 'Sports, Videos': 'Sports',
    'Opinion, Technology': 'Opinion', 'Sindh, World': 'World', 'Pakistan, K-P, Health': 'Health',
    'Pakistan, Sindh, Punjab': 'National', 'Life & Style, Opinion': 'Opinion', 'Balochistan, Life & Style': 'Entertainment',
    'Life & Style, Art and Books, Film': 'Entertainment', 'Pakistan, Opinion': 'Opinion', 'Punjab, Jammu & Kashmir': 'National',
    'Punjab, Sports': 'Sports', 'Life & Style, Technology': 'Technology', 'Gossip': 'Entertainment',
    'Life & Style, Music, Film': 'Entertainment', 'Life & Style, Health, TV': 'Health', 'Technology, Games': 'Technology',
    'Pakistan, World, Jammu & Kashmir': 'World', 'Life & Style, Music, Fashion': 'Entertainment',
    'Life & Style, Art and Books, Health': 'Health', 'Sindh, Videos': 'Other', 'Punjab, Food': 'Food',
    'Life & Style, Film, Health': 'Health', 'Sports, Multan, Cities': 'Sports', 'Music, Fashion': 'Entertainment',
    'Videos': 'Other', 'K-P, Art and Books': 'Entertainment', 'Music, Gossip': 'Entertainment',
    'Business, Jammu & Kashmir': 'Business', 'Sindh, Balochistan': 'National', 'Opinion, Editorial': 'Opinion',
    'Pakistan, archives': 'Other', 'Jammu & Kashmir, Gilgit Baltistan': 'National', 'Punjab, K-P': 'National',
    'Business, K-P': 'Business', 'Life & Style, Fashion, Health': 'Health', 'World, Azad Jammu & Kashmir': 'World',
    'Life &amp; Style, TV': 'Entertainment', 'Sindh, Business, Health': 'Business', 'Sports, Business': 'Sports',
    'Punjab, Business, Lahore, Cities': 'Business', 'Punjab, Life & Style, Gossip': 'Entertainment',
    'Life & Style, Art and Books, TV': 'Entertainment', 'Sindh, Karachi, Cities': 'National', 'Sindh, archives': 'Other',
    'Pakistan, Islamabad': 'National', 'life and style': 'Entertainment', 'life and style, Music': 'Entertainment',
    'Islamabad': 'National', 'Pakistan, Health, Food': 'Health', 'Pakistan, Khyber-Pakhtunkhwa': 'National',
    'Music, Film, Theatre': 'Entertainment', 'Music': 'Entertainment', 'Cricket': 'Sports',
    'Film, Gossip, Bollywood': 'Entertainment', 'Khyber-Pakhtunkhwa': 'National', 'Sindh, Karachi': 'National',
    'Khyber-Pakhtunkhwa, Pakistan': 'National', 'World, Azad Jammu & Kashmir, Cities': 'World',
    'Pakistan, Islamabad, Cities': 'National', 'Gossip, TV, Music': 'Entertainment',
    'Music, Pakistan, Life & Style': 'Entertainment', 'Bollywood, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir': 'National', 'Art and Books, Film, Games': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Cities': 'National', 'Sindh, Hyderabad': 'National', 'Sindh, Cities': 'National',
    'Pakistan, Sindh, Cities': 'National', 'Art and Books, Film': 'Entertainment', 'Pakistan, Lahore': 'National',
    'Sports, TV, Gossip': 'Sports', 'Punjab, Pakistan, Lahore': 'National', 'Games': 'Other',
    'Khyber-Pakhtunkhwa, Swat': 'National', 'Pakistan, Sindh, Karachi, Business': 'Business',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National', 'Football': 'Sports', 'Pakistan, Peshawar': 'National',
    'TV, Sports': 'Sports', 'Khyber-Pakhtunkhwa, Cities, Peshawar': 'National', 'Rawalpindi': 'National',
    'Slideshows, World': 'World', 'Pakistan, Azad Jammu & Kashmir, Cities': 'National',
    'Pakistan, Cities, Khyber Pakhtunkhwa': 'National', 'Gossip, Film': 'Entertainment', 'Health, ADVICE': 'Health',
    'TV, Film': 'Entertainment', 'Pakistan, Lahore, Cities': 'National', 'Pakistan, Gwadar, Cities': 'National',
    'Rawalpindi, Punjab': 'National', 'Punjab, Rawalpindi': 'National', 'Khyber-Pakhtunkhwa, Peshawar': 'National',
    'Punjab, Lahore': 'National', 'Pakistan, Balochistan, Cities': 'National', 'Sindh, khairpur': 'National',
    'Islamabad, Pakistan, Cities': 'National', 'Pakistan, Khyber-Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Karachi': 'National', 'Khyber-Pakhtunkhwa, Abbottabad': 'National', 'TV, Gossip': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Gilgit-Baltistan': 'National', 'World, Pakistan': 'World',
    'Khyber-Pakhtunkhwa, Cities': 'National', 'Fashion, Gossip': 'Entertainment', 'Islamabad, World': 'World',
    'Punjab, Multan': 'National', 'Punjab, Faisalabad': 'National', 'Pakistan, Islamabad, Sindh': 'National',
    'Pakistan, Karachi, Cities': 'National', 'Islamabad, Business': 'Business', 'Sindh, Pakistan': 'National',
    'Pakistan, Islamabad, Punjab': 'National', 'Pakistan, Sindh, Karachi, Cities': 'National',
    'Pakistan, Cities, Azad Jammu & Kashmir': 'National', 'Health, Life & Style, ADVICE': 'Health',
    'Islamabad, Balochistan': 'National', 'Pakistan, Gilgit-Baltistan': 'National', 'Pakistan, Punjab, Cities': 'National',
    'Sindh, tharparkar': 'National', 'Business, Gilgit-Baltistan': 'Business', 'Khyber Pakhtunkhwa': 'National',
    'Film, TV, Life & Style, life and style': 'Entertainment', 'TV, Film, Life & Style': 'Entertainment',
    'Film, Life & Style': 'Entertainment', 'Pakistan, Cities, Lahore': 'National',
    'Life & Style, Gossip, Film': 'Entertainment', 'Film, Art and Books': 'Entertainment', 'Food, ADVICE, Health': 'Health',
    'Gossip, Fashion': 'Entertainment', 'Gossip, Life & Style': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Mardan': 'National', 'Pakistan, Punjab, Lahore': 'National',
    'Life & Style, Gossip, Fashion': 'Entertainment', 'Pakistan, Cities': 'National',
    'Islamabad, Rawalpindi': 'National', 'Islamabad, Pakistan': 'National', 'Pakistan, Islamabad, World': 'World',
    'Pakistan, Sindh, Nawabshah': 'National', 'Balochistan, Islamabad, Pakistan': 'National', 'Fashion, Life & Style': 'Entertainment',
    'Health, ADVICE, Life & Style': 'Health', 'Music, Life & Style': 'Entertainment',
    'Pakistan, Peshawar, Islamabad, Cities': 'National', 'Punjab, Gilgit-Baltistan': 'National', 'Health, World': 'Health',
    'Gossip, Life & Style, Music': 'Entertainment', 'Gossip, Music': 'Entertainment', 'Gossip, Film, TV': 'Entertainment',
    'Technology, Life & Style': 'Technology', 'Sindh, Pakistan, Karachi, Cities': 'National',
    'Gossip, TV, Film, Life & Style': 'Entertainment', 'Pakistan, World, Islamabad, Cities': 'World',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'Sindh, Nawabshah': 'National', 'Azad Jammu & Kashmir, Pakistan': 'National',
    'TV, Gossip, Life & Style': 'Entertainment', 'Film, TV, Life & Style': 'Entertainment',
    'Khyber Pakhtunkhwa, Peshawar, Sports': 'Sports', 'Film, Gossip, Life & Style': 'Entertainment',
    'Rawalpindi, Islamabad': 'National', 'Sindh, sukkur': 'National', 'Pakistan, World, Islamabad': 'World',
    'Pakistan, Sindh, Karachi, Islamabad': 'National', 'Business, Pakistan': 'Business',
    'ADVICE, Life & Style, Film': 'Entertainment', 'Film, Life & Style, TV, Gossip': 'Entertainment', 'Music, TV': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Islamabad': 'National', 'Music, Gossip, Life & Style': 'Entertainment',
    'Islamabad, Cities, Pakistan': 'National', 'Islamabad, Punjab': 'National', 'Health, Life & Style': 'Health',
    'Technology, World': 'Technology', 'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Hyderabad': 'National', 'Life & Style, TV, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'ADVICE, Life & Style': 'Health', 'Pakistan, Sindh, Balochistan': 'National',
    'Gossip, Life & Style, TV': 'Entertainment', 'Music, Film, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Islamabad': 'National', 'Pakistan, World, Azad Jammu & Kashmir': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World', 'Rawalpindi, Pakistan, Business': 'Business',
    'Khyber Pakhtunkhwa, Peshawar': 'National', 'Sindh, dadu': 'National', 'Fashion, Gossip, Life & Style': 'Entertainment',
    'Pakistan, Karachi, Sindh': 'National', 'Pakistan, Balochistan, Quetta': 'National', 'TV, Life & Style': 'Entertainment',
    'Pakistan, Islamabad, World, Cities': 'World', 'Pakistan, Khyber Pakhtunkhwa, Cities': 'National',
    'Pakistan, Karachi, Sindh, Cities': 'National', 'Balochistan, Quetta': 'National', 'Pakistan, Rawalpindi': 'National',
    'Punjab, Film': 'Entertainment', 'Khyber Pakhtunkhwa, Swat': 'National', 'Balochistan, Gwadar': 'National',
    'TV, Life & Style, Gossip': 'Entertainment', 'Sindh, Karachi, Pakistan, Cities': 'National',
    'Pakistan, Sindh, Islamabad': 'National', 'Pakistan, Punjab, Islamabad, Cities': 'National',
    'Khyber Pakhtunkhwa, Nowshera': 'National', 'TV, Film, Gossip': 'Entertainment', 'Abbottabad': 'National',
    'Pakistan, Karachi, Peshawar': 'National', 'Pakistan, Khyber Pakhtunkhwa': 'National', 'Sports, Hockey': 'Sports',
    'Art and Books, Life & Style': 'Entertainment', 'Pakistan, World, Balochistan': 'World', 'Theatre': 'Entertainment',
    'Gossip, Film, Life & Style': 'Entertainment', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business',
    'Film, TV, Gossip, Life & Style': 'Entertainment', 'Life & Style, Sports': 'Sports',
    'Music, Art and Books, Life & Style, Film': 'Entertainment', 'Pakistan, Sindh, Karachi, Hyderabad, Cities': 'National',
    'Life & Style, ADVICE': 'Health',
    'Pakistan, Sindh, Punjab, Azad Jammu & Kashmir, Gilgit-Baltistan, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Karachi': 'National', 'Fashion, Music': 'Entertainment', 'Film, TV': 'Entertainment',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra, Nowshera, Charsadda': 'National', 'Fashion, Gossip, TV': 'Entertainment',
    'Karachi, Pakistan': 'National', 'Pakistan, Azad Jammu & Kashmir, Muzaffarabad': 'National',
    'Pakistan, Azad Jammu & Kashmir, World': 'World', 'Film, Music, Gossip': 'Entertainment',
    'Pakistan, Punjab, Rawalpindi': 'National', 'Pakistan, Lahore, Punjab': 'National', 'World, Newslab': 'World',
    'Karachi': 'National', 'Pakistan, Islamabad, Azad Jammu & Kashmir': 'National', 'Gossip, Fashion, Life & Style': 'Entertainment',
    'Pakistan, Rawalpindi, Karachi': 'National', 'Art and Books, Gossip, Life & Style': 'Entertainment',
    'Film, Theatre, Life & Style': 'Entertainment', 'Pakistan, Punjab, Islamabad': 'National',
    'Gilgit-Baltistan, Pakistan': 'National',
    'World, Business': 'Business',
    'Gilgit-Baltistan': 'National',
    'Ù¾Ø§Ú©Ø³Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Swat': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',

    # New mappings for 'daily_times' dataset
    'arts, culture &amp; books': 'Entertainment',
    'arts, culture &amp; books & blog': 'Entertainment',
    'arts, culture &amp; books & culture': 'Entertainment',
    'arts, culture &amp; books & education': 'Entertainment',
    'arts, culture &amp; books & entertainment': 'Entertainment',
    'arts, culture &amp; books & entertainment & world': 'Entertainment',
    'arts, culture &amp; books & featured': 'Entertainment',
    'arts, culture &amp; books & featured & featured': 'Entertainment',
    'arts, culture &amp; books & health': 'Entertainment',
    'arts, culture &amp; books & lifestyle': 'Entertainment',
    'arts, culture &amp; books & lifestyle & pakistan': 'Entertainment',
    'arts, culture &amp; books & pakistan': 'Entertainment',
    'arts, culture &amp; books & perspectives': 'Entertainment',
    'arts, culture &amp; books & top stories': 'Entertainment',
    'arts, culture &amp; books & trending': 'Entertainment',
    'arts, culture &amp; books & world': 'Entertainment',
    'balochistan & business': 'Business',
    'balochistan & business & pakistan & pakistan': 'Business',
    'balochistan & business & pakistan & pakistan & top stories': 'Business',
    'balochistan & business & pakistan & top stories': 'Business',
    'balochistan & education': 'National',
    'balochistan & health': 'Health',
    'balochistan & health & pakistan': 'Health',
    'balochistan & islamabad & khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & islamabad & khyber pakhtunkhwa & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & islamabad & pakistan & pakistan': 'National',
    'balochistan & islamabad & punjab & sindh': 'National',
    'balochistan & khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & khyber pakhtunkhwa & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & pakistan': 'National',
    'balochistan & pakistan & pakistan': 'National',
    'balochistan & pakistan & pakistan & top stories': 'National',
    'balochistan & pakistan & sindh': 'National',
    'balochistan & pakistan & top stories': 'National',
    'balochistan & reviews': 'National',
    'balochistan & sindh': 'National',
    'balochistan & sports': 'Sports',
    'balochistan & top stories': 'National',
    'blog': 'Other',
    'blog & blogs': 'Other',
    'blog & lifestyle & pakistan': 'Entertainment',
    'blogs': 'Other',
    'blogs & culture': 'Other',
    'blogs & featured': 'Other',
    'blogs & gilgit baltistan & sports': 'Sports',
    'blogs & lifestyle': 'Other',
    'blogs & lifestyle & uncategorized': 'Other',
    'blogs & pakistan': 'Other',
    'blogs & perspectives': 'Other',
    'blogs & sports': 'Sports',
    'blogs & trending': 'Other',
    'business & education & pakistan & trending': 'Business',
    'business & featured & pakistan': 'Business',
    'business & finance': 'Business',
    'business & finance & international': 'Business',
    'business & finance & international & pakistan': 'Business',
    'business & finance & pakistan': 'Business',
    'business & finance & pakistan & top stories': 'Business',
    'business & finance & pakistan & uncategorized': 'Business',
    'business & finance & top stories': 'Business',
    'business & health & pakistan': 'Business',
    'business & international': 'Business',
    'business & international & pakistan': 'Business',
    'business & islamabad': 'Business',
    'business & islamabad & pakistan': 'Business',
    'business & islamabad & pakistan & pakistan': 'Business',
    'business & islamabad & pakistan & pakistan & top stories': 'Business',
    'business & khyber pakhtunkhwa': 'Business',
    'business & khyber pakhtunkhwa & pakistan': 'Business',
    'business & lahore & pakistan & punjab': 'Business',
    'business & lifestyle & pakistan': 'Business',
    'business & pakistan': 'Business',
    'business & pakistan & pakistan': 'Business',
    'business & pakistan & pakistan & punjab': 'Business',
    'business & pakistan & pakistan & punjab & top stories': 'Business',
    'business & pakistan & pakistan & sindh': 'Business',
    'business & pakistan & pakistan & sindh & top stories': 'Business',
    'business & pakistan & pakistan & top stories': 'Business',
    'business & pakistan & punjab': 'Business',
    'business & pakistan & punjab & top stories': 'Business',
    'business & pakistan & science and technology': 'Business',
    'business & pakistan & sindh': 'Business',
    'business & pakistan & sindh & top stories': 'Business',
    'business & pakistan & sports': 'Business',
    'business & pakistan & top stories': 'Business',
    'business & pakistan & trending & world': 'Business',
    'business & pakistan & uncategorized': 'Business',
    'business & pakistan & world': 'Business',
    'business & perspectives': 'Business',
    'business & punjab': 'Business',
    'business & reviews': 'Business',
    'business & science and technology & tgif & world': 'Business',
    'business & sindh': 'Business',
    'business & sindh & top stories': 'Business',
    'business & sponsored content': 'Business',
    'business & sports': 'Business',
    'business & top stories': 'Business',
    'business & top stories & trending & uncategorized & world': 'Business',
    'business & top stories & trending & world': 'Business',
    'business & top stories & world': 'Business',
    'business & trending': 'Business',
    'business & trending & world': 'Business',
    'business & uncategorized': 'Business',
    'business & world': 'Business',
    'cartoons': 'Opinion',
    'celebrity interviews & featured & lifestyle': 'Entertainment',
    'celebrity interviews & international & lifestyle': 'Entertainment',
    'celebrity interviews & lifestyle': 'Entertainment',
    'celebrity interviews & lifestyle & pakistan': 'Entertainment',
    'commentary / insight': 'Opinion',
    'commentary / insight & pakistan': 'Opinion',
    'commentary / insight & trending': 'Opinion',
    'culture': 'Entertainment',
    'culture & infotainment & pakistan': 'Entertainment',
    'culture & pakistan': 'Entertainment',
    'culture & world': 'Entertainment',
    'education': 'Other',
    'education & international': 'Other',
    'education & international & world': 'Other',
    'education & lahore & pakistan': 'National',
    'education & pakistan': 'National',
    'education & pakistan & punjab': 'National',
    'education & pakistan & top stories': 'National',
    'education & pakistan & trending & world': 'National',
    'education & punjab': 'National',
    'education & top stories': 'Other',
    'entertainment & fashion & lifestyle': 'Entertainment',
    'entertainment & film and drama reviews': 'Entertainment',
    'entertainment & gossip & lifestyle': 'Entertainment',
    'entertainment & infotainment': 'Entertainment',
    'entertainment & infotainment & international': 'Entertainment',
    'entertainment & infotainment & international & science and technology & social mania & world': 'Entertainment',
    'entertainment & international': 'Entertainment',
    'entertainment & international & lifestyle': 'Entertainment',
    'entertainment & karachi & pakistan & sports & top stories': 'Entertainment',
    'entertainment & karachi & sindh': 'Entertainment',
    'entertainment & lifestyle': 'Entertainment',
    'entertainment & lifestyle & lifestyle': 'Entertainment',
    'entertainment & lifestyle & movies': 'Entertainment',
    'entertainment & lifestyle & music': 'Entertainment',
    'entertainment & lifestyle & pakistan': 'Entertainment',
    'entertainment & lifestyle & trending': 'Entertainment',
    'entertainment & lifestyle & tv': 'Entertainment',
    'entertainment & lifestyle & uncategorized': 'Entertainment',
    'entertainment & lifestyle & world': 'Entertainment',
    'entertainment & music': 'Entertainment',
    'entertainment & music & uncategorized': 'Entertainment',
    'entertainment & pakistan': 'Entertainment',
    'entertainment & pakistan & top stories': 'Entertainment',
    'entertainment & pakistan & trending': 'Entertainment',
    'entertainment & science and technology & top stories': 'Entertainment',
    'entertainment & top stories': 'Entertainment',
    'entertainment & trending': 'Entertainment',
    'entertainment & trending & world': 'Entertainment',
    'entertainment & tv': 'Entertainment',
    'fashion & featured & gossip & lifestyle & trending': 'Entertainment',
    'fashion & gilgit baltistan': 'Entertainment',
    'fashion & international & lifestyle': 'Entertainment',
    'fashion & lifestyle': 'Entertainment',
    'fashion & lifestyle & pakistan': 'Entertainment',
    'fashion & lifestyle & trending': 'Entertainment',
    'featured & gilgit baltistan & pakistan & pakistan & trending': 'Other',
    'featured & international & lifestyle': 'Other',
    'featured & lahore': 'National',
    'featured & lifestyle': 'Other',
    'featured & op-ed': 'Other',
    'featured & pakistan': 'National',
    'featured & pakistan & punjab': 'National',
    'featured & pakistan & top stories': 'National',
    'featured & pakistan & trending & uncategorized': 'Other',
    'featured & pakistan & world': 'World',
    'featured & uncategorized': 'Other',
    'featured & world': 'World',
    'film and drama reviews': 'Entertainment',
    'film and drama reviews & international & lifestyle': 'Entertainment',
    'finance': 'Business',
    'finance & pakistan': 'Business',
    'gilgit baltistan & pakistan': 'National',
    'gilgit baltistan & pakistan & punjab': 'National',
    'gilgit baltistan & pakistan & sports': 'Sports',
    'gilgit baltistan & pakistan & top stories': 'National',
    'gilgit baltistan & top stories': 'National',
    'gossip & international & lifestyle & trending': 'Entertainment',
    'gossip & lifestyle': 'Entertainment',
    'gossip & lifestyle & trending': 'Entertainment',
    'health & international': 'Health',
    'health & islamabad': 'Health',
    'health & islamabad & pakistan & pakistan': 'Health',
    'health & islamabad & pakistan & top stories': 'Health',
    'health & khyber pakhtunkhwa': 'Health',
    'health & khyber pakhtunkhwa & pakistan': 'Health',
    'health & lahore & pakistan & punjab & top stories': 'Health',
    'health & lahore & pakistan & top stories': 'Health',
    'health & lahore & top stories': 'Health',
    'health & lifestyle': 'Health',
    'health & pakistan': 'Health',
    'health & pakistan & punjab': 'Health',
    'health & pakistan & punjab & top stories': 'Health',
    'health & pakistan & sindh': 'Health',
    'health & pakistan & top stories': 'Health',
    'health & pakistan & world': 'World',
    'health & punjab': 'Health',
    'health & social mania': 'Health',
    'health & sports & top stories': 'Sports',
    'health & top stories': 'Health',
    'health & top stories & world': 'World',
    'health & world': 'World',
    'infotainment': 'Entertainment',
    'infotainment & international': 'Entertainment',
    'infotainment & international & world': 'Entertainment',
    'infotainment & lahore & pakistan': 'Entertainment',
    'infotainment & lifestyle': 'Entertainment',
    'infotainment & lifestyle & world': 'Entertainment',
    'infotainment & pakistan': 'Entertainment',
    'infotainment & science and technology': 'Technology',
    'infotainment & top stories': 'Entertainment',
    'infotainment & trending': 'Entertainment',
    'infotainment & world': 'Entertainment',
    'international': 'World',
    'international & islamabad & pakistan': 'World',
    'international & karachi & lifestyle': 'Entertainment',
    'international & kashmir': 'World',
    'international & lifestyle': 'Entertainment',
    'international & lifestyle & movies': 'Entertainment',
    'international & lifestyle & pakistan': 'Entertainment',
    'international & movies': 'Entertainment',
    'international & pakistan': 'World',
    'international & pakistan & top stories': 'World',
    'international & pakistan & world': 'World',
    'international & science and technology': 'Technology',
    'international & science and technology & world': 'Technology',
    'international & sports': 'Sports',
    'international & top stories': 'World',
    'international & top stories & world': 'World',
    'international & travel': 'World',
    'international & world': 'World',
    'international & world & world': 'World',
    'islamabad & kashmir & top stories': 'National',
    'islamabad & lahore & pakistan': 'National',
    'islamabad & lahore & pakistan & sports': 'Sports',
    'islamabad & lahore & pakistan & sports & top stories': 'Sports',
    'islamabad & lahore & pakistan & top stories': 'National',
    'islamabad & lahore & sports & top stories': 'Sports',
    'islamabad & lifestyle': 'Other',
    'islamabad & lifestyle & pakistan': 'Other',
    'islamabad & pakistan': 'National',
    'islamabad & pakistan & pakistan': 'National',
    'islamabad & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'islamabad & pakistan & pakistan & punjab & top stories': 'National',
    'islamabad & pakistan & pakistan & sindh & top stories': 'National',
    'islamabad & pakistan & pakistan & top stories': 'National',
    'islamabad & pakistan & punjab': 'National',
    'islamabad & pakistan & punjab & top stories': 'National',
    'islamabad & pakistan & sindh': 'National',
    'islamabad & pakistan & sports': 'Sports',
    'islamabad & pakistan & top stories': 'National',
    'islamabad & pakistan & trending': 'National',
    'islamabad & pakistan & world': 'World',
    'islamabad & sports': 'Sports',
    'islamabad & top stories': 'National',
    'karachi & pakistan': 'National',
    'karachi & pakistan & sindh': 'National',
    'karachi & pakistan & sindh & sports': 'Sports',
    'karachi & pakistan & sindh & top stories': 'National',
    'karachi & pakistan & top stories': 'National',
    'karachi & sindh': 'National',
    'karachi & sindh & top stories': 'National',
    'kashmir': 'National',
    'kashmir & pakistan': 'National',
    'kashmir & pakistan & top stories': 'National',
    'kashmir & pakistan & uncategorized': 'National',
    'kashmir & pakistan & world': 'World',
    'kashmir & top stories': 'National',
    'kashmir & top stories & world': 'World',
    'khyber pakhtunkhwa & pakistan': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan & sindh & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & punjab': 'National',
    'khyber pakhtunkhwa & pakistan & sindh': 'National',
    'khyber pakhtunkhwa & pakistan & sindh & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & top stories': 'National',
    'khyber pakhtunkhwa & top stories': 'National',
    'lahore & pakistan': 'National',
    'lahore & pakistan & pakistan': 'National',
    'lahore & pakistan & punjab': 'National',
    'lahore & pakistan & punjab & top stories': 'National',
    'lahore & pakistan & sports': 'Sports',
    'lahore & pakistan & sports & top stories': 'Sports',
    'lahore & pakistan & top stories': 'National',
    'lahore & punjab': 'National',
    'lahore & punjab & top stories': 'National',
    'lahore & top stories': 'National',
    'lifestyle': 'Entertainment',
    'lifestyle & lifestyle': 'Entertainment',
    'lifestyle & movies': 'Entertainment',
    'lifestyle & movies & music & trending': 'Entertainment',
    'lifestyle & movies & pakistan': 'Entertainment',
    'lifestyle & movies & trending': 'Entertainment',
    'lifestyle & music': 'Entertainment',
    'lifestyle & music & tv': 'Entertainment',
    'lifestyle & pakistan': 'Entertainment',
    'lifestyle & pakistan & top stories': 'Entertainment',
    'lifestyle & pakistan & trending': 'Entertainment',
    'lifestyle & reviews': 'Entertainment',
    'lifestyle & science and technology & sports': 'Entertainment',
    'lifestyle & sports': 'Sports',
    'lifestyle & top stories': 'Entertainment',
    'lifestyle & top stories & uncategorized': 'Entertainment',
    'lifestyle & trending': 'Entertainment',
    'lifestyle & trending & tv': 'Entertainment',
    'lifestyle & tv': 'Entertainment',
    'lifestyle & uncategorized': 'Entertainment',
    'lifestyle & world': 'Entertainment',
    'music': 'Entertainment',
    'off-beat': 'Other',
    'off-beat & pakistan': 'Other',
    'off-beat & top stories': 'Other',
    'off-beat & top stories & world': 'Other',
    'off-beat & uncategorized': 'Other',
    'off-beat & world': 'Other',
    'op-ed': 'Opinion',
    'op-ed & pakistan': 'Opinion',
    'pakistan & pakistan': 'National',
    'pakistan & pakistan & punjab': 'National',
    'pakistan & pakistan & punjab & top stories': 'National',
    'pakistan & pakistan & sindh': 'National',
    'pakistan & pakistan & sindh & top stories': 'National',
    'pakistan & pakistan & sindh & top stories & trending': 'National',
    'pakistan & pakistan & sports': 'Sports',
    'pakistan & pakistan & top stories': 'National',
    'pakistan & pakistan & top stories & world': 'World',
    'pakistan & pakistan & uncategorized': 'Other',
    'pakistan & pakistan & world': 'World',
    'pakistan & pakistan & world & world': 'World',
    'pakistan & perspectives': 'Other',
    'pakistan & punjab': 'National',
    'pakistan & punjab & sindh': 'National',
    'pakistan & punjab & sindh & top stories': 'National',
    'pakistan & punjab & top stories': 'National',
    'pakistan & punjab & trending': 'National',
    'pakistan & punjab & world': 'World',
    'pakistan & reviews': 'Other',
    'pakistan & science and technology': 'Technology',
    'pakistan & sindh': 'National',
    'pakistan & sindh & sports': 'Sports',
    'pakistan & sindh & top stories': 'National',
    'pakistan & sindh & top stories & trending': 'National',
    'pakistan & sindh & trending': 'National',
    'pakistan & sindh & uncategorized': 'Other',
    'pakistan & sponsored content': 'Other',
    'pakistan & sports': 'Sports',
    'pakistan & sports & sports': 'Sports',
    'pakistan & sports & top stories': 'Sports',
    'pakistan & sports & uncategorized': 'Other',
    'pakistan & sports & world': 'World',
    'pakistan & top stories': 'National',
    'pakistan & top stories & travel': 'Other',
    'pakistan & top stories & trending': 'National',
    'pakistan & top stories & uncategorized': 'Other',
    'pakistan & top stories & world': 'World',
    'pakistan & travel': 'Other',
    'pakistan & trending': 'National',
    'pakistan & trending & trending': 'National',
    'pakistan & trending & trending & world': 'World',
    'pakistan & trending & uncategorized': 'Other',
    'pakistan & uncategorized': 'Other',
    'pakistan & uncategorized & world': 'World',
    'pakistan & world': 'World',
    'pakistan & world & world': 'World',
    'perspectives': 'Other',
    'perspectives & world': 'World',
    'punjab & sindh': 'National',
    'punjab & top stories': 'National',
    'punjab & trending': 'National',
    'punjab & uncategorized': 'Other',
    'region': 'National',
    'reviews': 'Other',
    'reviews & sports': 'Sports',
    'reviews & trending': 'Other',
    'sci-tec & world': 'Technology',
    'science and technology': 'Technology',
    'science and technology & top stories': 'Technology',
    'science and technology & trending': 'Technology',
    'science and technology & world': 'Technology',
    'sindh & sports': 'Sports',
    'sindh & top stories': 'National',
    'social mania & tgif & trending & world': 'Other',
    'sponsored content & trending': 'Other',
    'sports & sports': 'Sports',
    'sports & sports & uncategorized': 'Sports',
    'sports & top stories': 'Sports',
    'sports & top stories & trending': 'Sports',
    'sports & top stories & world': 'World',
    'sports & trending': 'Sports',
    'sports & uncategorized': 'Sports',
    'top stories': 'Other',
    'top stories & travel': 'Other',
    'top stories & trending & trending & world': 'World',
    'top stories & trending & world': 'World',
    'top stories & uncategorized': 'Other',
    'top stories & uncategorized & world': 'Other',
    'top stories & world': 'World',
    'top stories & world & world': 'World',
    'travel': 'Other',
    'travel & world': 'World',
    'trending': 'Other',
    'trending & trending & world': 'World',
    'trending & world': 'World',
    'uncategorized & world': 'Other',
    'world & world': 'World',
    'ânot yet provided sufficient scientific and factual grounds for its behavior': 'Other',
    'nan': 'Other' # Map 'nan' string to 'Other'
}

# Create a new mapping with all keys converted to lowercase
lowercase_category_mapping = {k.lower(): v for k, v in category_mapping.items()}

for name, df in dataframes.items():

    if 'categories' in df.columns:
        df['categories'] = df['categories'].astype(str).str.lower()

    # Apply the lowercase mapping
    df['mapped_categories'] = df['categories'].map(lowercase_category_mapping)

for name, df in dataframes.items():
    unique_mapped_categories = df['mapped_categories'].unique()
    print(f"\nUnique mapped categories in {name}:")
    print(unique_mapped_categories)
    nan_count = df['mapped_categories'].isnull().sum()
    print(f"Number of NaN values in 'mapped_categories' in {name}: {nan_count}")


Unique mapped categories in daily_times(full-data):
['National' 'Other' 'Entertainment' 'Opinion' 'Business' 'World' 'Sports'
 'Health' 'Technology']
Number of NaN values in 'mapped_categories' in daily_times(full-data): 0


In [None]:
filtered_dataframes = {}

output_file_path = "daily_times.csv"

for name, df in dataframes.items():
    # Filter for 'National' or 'World' categories
    filtered_df = df[df['mapped_categories'].isin(['National', 'World'])]

    filtered_dataframes[name + '_filtered'] = filtered_df

    # Save to single file (will overwrite each loop)
    filtered_df.to_csv(output_file_path, index=False)

    print(f"Filtered data saved to: {output_file_path}")
    print(f"\nFiltered DataFrame: {name}_filtered")
    display(filtered_df.head())
    print(f"Shape of filtered DataFrame: {filtered_df.shape}")


Filtered data saved to: daily_times.csv

Filtered DataFrame: Copy of Copy of daily_times(full-data)_filtered


Unnamed: 0,headline,date,link,source,categories,description,mapped_categories
0,Two abductees recovered after encounter in Kha...,1/1/2020 0:47,https://dailytimes.com.pk/530409/two-abductees...,Daily Times,pakistan,Shikarpur police have succeeded in getting two...,National
1,NUMS holds PMC NEB Step-II examination,1/1/2020 0:53,https://dailytimes.com.pk/530410/nums-holds-pm...,Daily Times,pakistan,After the successful conduct of four NEB exami...,National
2,Team Cell hosts advanced training Bootcamp for...,1/1/2020 0:59,https://dailytimes.com.pk/530411/team-cell-hos...,Daily Times,pakistan,"Team Cell hosted Islamabad Semester for SMEs, ...",National
3,"The City School, Rijas Aces Property team up t...",1/1/2020 1:10,https://dailytimes.com.pk/530414/the-city-scho...,Daily Times,pakistan,The City School Group Pvt Ltd. is venturing in...,National
4,Senate body discusses ecological and aquatic e...,1/1/2020 1:14,https://dailytimes.com.pk/530412/senate-body-d...,Daily Times,pakistan,Senate Functional Committee on Human Rights in...,National


Shape of filtered DataFrame: (82173, 7)


In [None]:
final_df.to_csv("/content/drive/MyDrive/project dataset/daily_times.csv", index=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## **Business Recorder**

In [9]:
selected_files2 = files[0:1]
print("Selected files:", selected_files2)

dataframes = {}

CHUNK_SIZE = 50000

for file in selected_files2:
    file_path = os.path.join(data_path, file)
    df_name = os.path.splitext(file)[0]

    try:
        chunk_list = []

        for chunk in pd.read_csv(
            file_path,
            encoding="latin-1",
            index_col=False,
            chunksize=CHUNK_SIZE
        ):
            chunk = chunk.loc[:, ~chunk.columns.str.contains('^Unnamed')]
            chunk_list.append(chunk)

        df = pd.concat(chunk_list, ignore_index=True)
        dataframes[df_name] = df

        print(f"Loaded {file} in chunks")

    except Exception as e:
        print(f"Error loading {file}: {e}")

for name, df in dataframes.items():
    print(f"\nDataFrame: {name}")
    display(df.head())
    print(f"Columns: {df.columns.tolist()}")


Selected files: ['business_recorder(2020-2023).csv']


  for chunk in pd.read_csv(
  for chunk in pd.read_csv(
  for chunk in pd.read_csv(
  for chunk in pd.read_csv(


Loaded business_recorder(2020-2023).csv in chunks

DataFrame: business_recorder(2020-2023)


Unnamed: 0,headline,date,link,source,categories,description
0,2019: A tough year it was indeed,1/1/2020,https://www.brecorder.com/news/557945/two-019-...,Business Recorder,Editorials,"[caption id=""attachment_524853"" align=""alignno..."
1,The matter of ECP appointments,1/1/2020,https://www.brecorder.com/news/557946/the-matt...,Business Recorder,Editorials,Delaying important decisions and making them d...
2,PTA plans to procure automated QoS monitoring ...,1/1/2020,https://www.brecorder.com/news/557922/pta-plan...,Business Recorder,Technology,Pakistan Telecommunication Authority (PTA) has...
3,"Number of 3G, 4G users reaches 74.33 million b...",1/1/2020,https://www.brecorder.com/news/557925/number-o...,Business Recorder,Technology,The number of 3G and 4G users in Pakistan reac...
4,China to cut reserve requirement for banks as ...,1/1/2020,https://www.brecorder.com/news/557995/china-to...,Business Recorder,Business & Finance,The central bank cut the requirement three tim...


Columns: ['headline', 'date', 'link', 'source', 'categories', 'description']


In [10]:
all_unique_categories = set()

for df_name, df in dataframes.items():
    if 'categories' in df.columns:
        # Ensure 'categories' column is treated as strings and handle NaNs
        df['categories'] = df['categories'].astype(str).str.lower()
        for categories_str in df['categories'].dropna().unique():
            # Split by comma and clean up whitespace
            for category in categories_str.split(','):
                cleaned_category = category.strip()
                if cleaned_category and cleaned_category != 'nan':
                    all_unique_categories.add(cleaned_category)

print("Unique Categories:")
for category in sorted(list(all_unique_categories)):
    print(f"- {category}")

broad_categories = ['Business', 'Politics', 'National', 'Sports', 'Opinion', 'Entertainment',
                    'Technology', 'Health', 'Food', 'World', 'Other']

category_mapping = {
    'Business': 'Business', 'Pakistan, Business': 'Business', 'Business, World': 'Business',
    'Balochistan, Business': 'Business', 'Punjab, Business': 'Business', 'Business, Technology': 'Business',
    'Business, K-P': 'Business', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business', 'Pakistan, Business, Islamabad': 'Business',
    'GOVERNANCE': 'Politics', 'GOVERNANCE & HEADLINES': 'Politics',
    'GOVERNANCE & HEADLINES & Top Headlines': 'Politics', 'GOVERNANCE & NATIONAL': 'Politics',
    'GOVERNANCE & HEADLINES & NATIONAL': 'Politics', 'POLITICS': 'Politics',
    'POLITICS, Pakistan': 'Politics', 'POLITICS, World': 'Politics', 'Pakistan, POLITICS': 'Politics',
    'NATIONAL': 'National', 'ISLAMABAD': 'National', 'KARACHI': 'National', 'LAHORE': 'National',
    'PESHAWAR': 'National', 'MULTAN': 'National', 'CITY': 'National', 'Rawalpindi': 'National',
    'Pakistan': 'National', 'Punjab': 'National', 'Sindh': 'National', 'K-P': 'National',
    'Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Azad Jammu & Kashmir': 'National',
    'Sports': 'Sports', 'Sports & Top Headlines': 'Sports', 'Sports & World': 'Sports',
    'Sports, K-P': 'Sports', 'Sports, Pakistan': 'Sports', 'Sports, TV': 'Sports',
    'Sports, Hockey': 'Sports', 'Sports, Cricket': 'Sports', 'Football': 'Sports', 'Tennis': 'Sports',
    'Sports, Punjab': 'Sports', 'Sports, Life & Style': 'Sports', 'Sports, Multan, Cities': 'Sports',
    'Editorials': 'Opinion', 'Editorials & Letters & Opinion': 'Opinion', 'Comment': 'Opinion',
    'Comment & Opinion': 'Opinion', 'Letters': 'Opinion', 'Opinion': 'Opinion', 'Cartoon & Opinion': 'Opinion',
    'Comment & HEADLINES & Opinion': 'Opinion', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'Entertainment': 'Entertainment', 'Life & Style': 'Entertainment', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Music': 'Entertainment', 'Life & Style, TV': 'Entertainment', 'Life & Style, Gossip': 'Entertainment',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Food': 'Entertainment',
    'Life & Style, Art and Books': 'Entertainment', 'Life & Style, Theatre': 'Entertainment',
    'Life & Style, Spotlight': 'Entertainment', 'Life & Style, Bollywood': 'Entertainment',
    'Life & Style, K-P': 'Entertainment', 'Life & Style, Health': 'Entertainment',
    'Technology': 'Technology', 'World, Technology': 'Technology', 'Technology, Business': 'Technology',
    'Technology, Sports': 'Technology', 'Technology, Food': 'Technology', 'Technology, Games': 'Technology',
    'Technology, Life & Style': 'Technology', 'Technology, Pakistan': 'Technology',
    'Health': 'Health', 'Pakistan, Health': 'Health', 'Sindh, Health': 'Health',
    'Jammu & Kashmir, Health': 'Health', 'Health, ADVICE': 'Health', 'Food, ADVICE, Health': 'Health',
    'Health, Life & Style': 'Health', 'Health, Latest': 'Health', 'Food': 'Food', 'Food, Spotlight': 'Food',
    'Technology, Food': 'Food',
    'World': 'World', 'Pakistan, World': 'World', 'World, Jammu & Kashmir': 'World', 'World, Sports': 'World',
    'World, Gilgit Baltistan': 'World', 'World, Life & Style': 'World', 'World, Music': 'World',
    'World, Fashion': 'World', 'World, Health': 'World', 'World, K-P': 'World', 'World, archives': 'World',
    'World, Videos': 'World', 'World, Opinion': 'World', 'World, Food, Technology': 'World', 'World, Bollywood': 'World',
    'World, Newslab': 'World', 'Pakistan, World, Islamabad': 'World', 'Pakistan, Islamabad, World, Cities': 'World',
    'Pakistan, World, Balochistan': 'World', 'Pakistan, Azad Jammu & Kashmir, World': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World',

    'Uncategorized': 'Other', 'SPONSORED': 'Other', 'Sponsored Content': 'Other',
    'NATIVE CONTENT': 'Other', 'Archives': 'Other', 'Slideshows, World': 'Other', 'Latest': 'Other',
    'Multimedia': 'Other', 'T.Edit': 'Other', 'NATIVE CONTENT & Pakistan Today': 'Other',
    'HEADLINES': 'Other', 'HEADLINES & Top Headlines': 'Other', 'FEATURED & top Featured': 'Other',
    'FEATURED & Top Non Business': 'Other', 'FEATURED': 'Other', 'E-papers & Profit Magazine': 'Other',
    'E-papers & Pakistan Today': 'Other', 'Top Headlines': 'Other', 'NATIONAL & Top Non Business': 'Other',
    'Editorials & Opinion': 'Opinion', 'Letters & Opinion': 'Opinion', 'NATIONAL & Top Headlines': 'Other',
    'NATIONAL & World': 'World', 'Top Non Business & World': 'World', 'LAHORE & NATIONAL': 'National',
    'Analysis & HEADLINES': 'Other', 'HEADLINES & NATIONAL': 'Other', 'CITY & LAHORE': 'National',
    'HEADLINES & NATIONAL & Top Headlines': 'Other', 'Top Headlines & World': 'World', 'KARACHI & NATIONAL': 'National',
    'NATIONAL & PESHAWAR': 'National', 'HEADLINES & KARACHI': 'Other', 'HEADLINES & LAHORE': 'Other',
    'HEADLINES & PESHAWAR': 'Other', 'INTERVIEW & Top Headlines': 'Other', 'ISLAMABAD & NATIONAL': 'National',
    'CITY & HEADLINES & LAHORE': 'Other', 'HEADLINES & Sports': 'Sports', 'KARACHI & LAHORE & NATIONAL': 'National',
    'KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Non Business': 'Other', 'FEATURED & Top Headlines': 'Other',
    'NATIONAL & Sports': 'Sports', 'KARACHI & NATIONAL & Top Headlines': 'National', 'CITY & ISLAMABAD': 'National',
    'HEADLINES & Top Headlines & World': 'World', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'HEADLINES & LAHORE & NATIONAL': 'Other',
    'Analysis & HEADLINES & NATIONAL & Top Headlines': 'Other',
    'Book Review': 'Other', 'LAHORE & NATIONAL & Top Headlines': 'National',
    'HEADLINES & LAHORE & NATIONAL & Top Headlines': 'Other', 'CITY & KARACHI & NATIONAL': 'National',
    'LAHORE & NATIONAL & PESHAWAR': 'National', 'Comment & Editorials': 'Opinion',
    'HEADLINES & Sports & Top Headlines': 'Sports', 'Book Review & E-papers & Pakistan Today': 'Other',
    'CITY & HEADLINES & NATIONAL': 'Other', 'CITY & NATIONAL': 'National', 'FEATURED & NATIONAL': 'Other',
    'ISLAMABAD & KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Headlines & World': 'World',
    'LAHORE & Top Headlines': 'Other', 'HEADLINES & ISLAMABAD & NATIONAL': 'Other',
    'HEADLINES & NATIONAL & Sports & Top Headlines': 'Sports', 'NATIONAL & Sports & Top Headlines': 'Sports',
    'Analysis & E-papers & Pakistan Today': 'Other', 'CITY & HEADLINES': 'Other', 'Sports & World': 'Sports',
    'HEADLINES & ISLAMABAD & LAHORE & NATIONAL': 'Other', 'CITY & FEATURED & LAHORE': 'Other',
    'NATIONAL & Opinion': 'Opinion', 'Entertainment & World': 'Entertainment',
    'HEADLINES & ISLAMABAD & NATIONAL & Pakistan Today': 'Other',
    'NATIONAL & Sports & Top Headlines & Top Non Business': 'Sports', 'HEADLINES & KARACHI & NATIONAL': 'Other',
    'OIC & World': 'World', 'Agriculture & NATIONAL': 'National', 'NATIONAL & top Featured': 'Other',
    'CITY & Education & MULTAN': 'National', 'Cartoon & Letters': 'Opinion', 'FEATURED & HEADLINES & NATIONAL': 'Other',
    'FEATURED & Sports': 'Sports', 'Comment & Letters': 'Opinion', 'ISLAMABAD & SPONSORED': 'Other',
    'top Featured & World': 'World', 'HEADLINES & NATIONAL & top Featured': 'Other',
    'NATIONAL & Top Headlines & World': 'World', 'NATIONAL & top Featured & World': 'World',
    'MULTAN & NATIONAL': 'National', 'NATIONAL & top Featured & Top Headlines': 'Other',
    'NATIONAL & PESHAWAR & Top Headlines': 'National', 'CITY & PESHAWAR': 'National',
    'HEADLINES & Letters & Opinion': 'Opinion', 'CITY & LAHORE & Opinion': 'Opinion',
    'ISLAMABAD & Pakistan Today': 'Other', 'top Featured & Top Headlines': 'Other', 'top Featured': 'Other',
    'PESHAWAR & Sports': 'Sports', 'Sports & top Featured': 'Sports', 'E-papers': 'Other',
    'CITY & ISLAMABAD & NATIONAL': 'National', 'Comment & NATIONAL & Opinion': 'Opinion',
    'Book Review & NATIONAL': 'Other', 'Entertainment & NATIONAL': 'Entertainment', 'Analysis & NATIONAL': 'Other',
    'NATIONAL & Pakistan Today': 'Other', 'Cartoon & Editorials': 'Opinion', 'CITY & LAHORE & NATIONAL': 'National',
    'Education & NATIONAL': 'National', 'NATIVE CONTENT & Pakistan Today': 'Other',

    'Pakistan, Punjab': 'National', 'Pakistan, K-P': 'National', 'Life & Style, Film, Gossip': 'Entertainment',
    'Life & Style, TV': 'Entertainment', 'Sindh': 'National', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Gossip': 'Entertainment', 'Life & Style, Music': 'Entertainment', 'Punjab': 'National', 'K-P': 'National',
    'Editorial': 'Opinion', 'Balochistan': 'National', 'Sindh, Health': 'Health', 'Pakistan, Life & Style': 'Entertainment',
    'Jammu & Kashmir, Health': 'Health', 'Pakistan, Sindh': 'National', 'K-P, Music': 'Entertainment',
    'Life & Style': 'Entertainment', 'Pakistan, Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Jammu & Kashmir': 'National',
    'Life & Style, Art and Books, Music': 'Entertainment', 'Life & Style, Fashion, Gossip': 'Entertainment',
    'Life & Style, Music, Gossip': 'Entertainment', 'Pakistan, Jammu & Kashmir': 'National', 'Sindh, Punjab': 'National',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Film, TV': 'Entertainment', 'Balochistan, Business': 'Business',
    'Life & Style, Health': 'Health', 'Punjab, Business': 'Business', 'Music, Film': 'Entertainment', 'TV': 'Entertainment',
    'Life & Style, Music, Food': 'Entertainment', 'Pakistan, Health': 'Health', 'Balochistan, K-P': 'National',
    'Sindh, Technology': 'Technology', 'Film': 'Entertainment', 'Sindh, Life & Style, Music': 'Entertainment',
    'Life & Style, Gossip, TV': 'Entertainment', 'Life & Style, Art and Books': 'Entertainment', 'K-P, Technology': 'Technology',
    'Magazine': 'Other', 'Film, Gossip': 'Entertainment', 'Life & Style, Theatre': 'Entertainment', 'Business, Technology': 'Business',
    'Balochistan, Gilgit Baltistan': 'National', 'K-P, Health': 'Health', 'Pakistan, Gilgit Baltistan': 'National',
    'Life & Style, Film, Fashion': 'Entertainment', 'Fashion': 'Entertainment', 'Punjab, World': 'World',
    'Pakistan, Sports': 'Sports', 'Pakistan, Technology': 'Technology', 'Balochistan, Health': 'Health',
    'Pakistan, Sindh, Art and Books': 'Entertainment', 'Life & Style, Fashion, TV': 'Entertainment', 'TV, Theatre': 'Entertainment',
    'Life & Style, Food': 'Food', 'Pakistan, Film': 'Entertainment', 'Health': 'Health', 'World, K-P': 'World',
    'Life & Style, K-P': 'Entertainment', 'Art and Books': 'Entertainment', 'Opinion, Health': 'Opinion',
    'Life & Style, Music, TV': 'Entertainment', 'Sports, Life & Style': 'Sports', 'Sindh, Jammu & Kashmir': 'National',
    'Sindh, Business': 'Business', 'Life & Style, Film, Theatre': 'Entertainment', 'Sindh, Sports': 'Sports',
    'archives': 'Other', 'Music, Health': 'Health', 'Punjab, Technology': 'Technology', 'Punjab, Health': 'Health',
    'Life & Style, Food, Gossip': 'Entertainment', 'Life & Style, Food, Health': 'Health', 'Sports, K-P': 'Sports',
    'Sindh, Life & Style': 'Entertainment', 'Life & Style, Music, Theatre': 'Entertainment', 'Sports, Videos': 'Sports',
    'Opinion, Technology': 'Opinion', 'Sindh, World': 'World', 'Pakistan, K-P, Health': 'Health',
    'Pakistan, Sindh, Punjab': 'National', 'Life & Style, Opinion': 'Opinion', 'Balochistan, Life & Style': 'Entertainment',
    'Life & Style, Art and Books, Film': 'Entertainment', 'Pakistan, Opinion': 'Opinion', 'Punjab, Jammu & Kashmir': 'National',
    'Punjab, Sports': 'Sports', 'Life & Style, Technology': 'Technology', 'Gossip': 'Entertainment',
    'Life & Style, Music, Film': 'Entertainment', 'Life & Style, Health, TV': 'Health', 'Technology, Games': 'Technology',
    'Pakistan, World, Jammu & Kashmir': 'World', 'Life & Style, Music, Fashion': 'Entertainment',
    'Life & Style, Art and Books, Health': 'Health', 'Sindh, Videos': 'Other', 'Punjab, Food': 'Food',
    'Life & Style, Film, Health': 'Health', 'Sports, Multan, Cities': 'Sports', 'Music, Fashion': 'Entertainment',
    'Videos': 'Other', 'K-P, Art and Books': 'Entertainment', 'Music, Gossip': 'Entertainment',
    'Business, Jammu & Kashmir': 'Business', 'Sindh, Balochistan': 'National', 'Opinion, Editorial': 'Opinion',
    'Pakistan, archives': 'Other', 'Jammu & Kashmir, Gilgit Baltistan': 'National', 'Punjab, K-P': 'National',
    'Business, K-P': 'Business', 'Life & Style, Fashion, Health': 'Health', 'World, Azad Jammu & Kashmir': 'World',
    'Life &amp; Style, TV': 'Entertainment', 'Sindh, Business, Health': 'Business', 'Sports, Business': 'Sports',
    'Punjab, Business, Lahore, Cities': 'Business', 'Punjab, Life & Style, Gossip': 'Entertainment',
    'Life & Style, Art and Books, TV': 'Entertainment', 'Sindh, Karachi, Cities': 'National', 'Sindh, archives': 'Other',
    'Pakistan, Islamabad': 'National', 'life and style': 'Entertainment', 'life and style, Music': 'Entertainment',
    'Islamabad': 'National', 'Pakistan, Health, Food': 'Health', 'Pakistan, Khyber-Pakhtunkhwa': 'National',
    'Music, Film, Theatre': 'Entertainment', 'Music': 'Entertainment', 'Cricket': 'Sports',
    'Film, Gossip, Bollywood': 'Entertainment', 'Khyber-Pakhtunkhwa': 'National', 'Sindh, Karachi': 'National',
    'Khyber-Pakhtunkhwa, Pakistan': 'National', 'World, Azad Jammu & Kashmir, Cities': 'World',
    'Pakistan, Islamabad, Cities': 'National', 'Gossip, TV, Music': 'Entertainment',
    'Music, Pakistan, Life & Style': 'Entertainment', 'Bollywood, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir': 'National', 'Art and Books, Film, Games': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Cities': 'National', 'Sindh, Hyderabad': 'National', 'Sindh, Cities': 'National',
    'Pakistan, Sindh, Cities': 'National', 'Art and Books, Film': 'Entertainment', 'Pakistan, Lahore': 'National',
    'Sports, TV, Gossip': 'Sports', 'Punjab, Pakistan, Lahore': 'National', 'Games': 'Other',
    'Khyber-Pakhtunkhwa, Swat': 'National', 'Pakistan, Sindh, Karachi, Business': 'Business',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National', 'Football': 'Sports', 'Pakistan, Peshawar': 'National',
    'TV, Sports': 'Sports', 'Khyber-Pakhtunkhwa, Cities, Peshawar': 'National', 'Rawalpindi': 'National',
    'Slideshows, World': 'World', 'Pakistan, Azad Jammu & Kashmir, Cities': 'National',
    'Pakistan, Cities, Khyber Pakhtunkhwa': 'National', 'Gossip, Film': 'Entertainment', 'Health, ADVICE': 'Health',
    'TV, Film': 'Entertainment', 'Pakistan, Lahore, Cities': 'National', 'Pakistan, Gwadar, Cities': 'National',
    'Rawalpindi, Punjab': 'National', 'Punjab, Rawalpindi': 'National', 'Khyber-Pakhtunkhwa, Peshawar': 'National',
    'Punjab, Lahore': 'National', 'Pakistan, Balochistan, Cities': 'National', 'Sindh, khairpur': 'National',
    'Islamabad, Pakistan, Cities': 'National', 'Pakistan, Khyber-Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Karachi': 'National', 'Khyber-Pakhtunkhwa, Abbottabad': 'National', 'TV, Gossip': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Gilgit-Baltistan': 'National', 'World, Pakistan': 'World',
    'Khyber-Pakhtunkhwa, Cities': 'National', 'Fashion, Gossip': 'Entertainment', 'Islamabad, World': 'World',
    'Punjab, Multan': 'National', 'Punjab, Faisalabad': 'National', 'Pakistan, Islamabad, Sindh': 'National',
    'Pakistan, Karachi, Cities': 'National', 'Islamabad, Business': 'Business', 'Sindh, Pakistan': 'National',
    'Pakistan, Islamabad, Punjab': 'National', 'Pakistan, Sindh, Karachi, Cities': 'National',
    'Pakistan, Cities, Azad Jammu & Kashmir': 'National', 'Health, Life & Style, ADVICE': 'Health',
    'Islamabad, Balochistan': 'National', 'Pakistan, Gilgit-Baltistan': 'National', 'Pakistan, Punjab, Cities': 'National',
    'Sindh, tharparkar': 'National', 'Business, Gilgit-Baltistan': 'Business', 'Khyber Pakhtunkhwa': 'National',
    'Film, TV, Life & Style, life and style': 'Entertainment', 'TV, Film, Life & Style': 'Entertainment',
    'Film, Life & Style': 'Entertainment', 'Pakistan, Cities, Lahore': 'National',
    'Life & Style, Gossip, Film': 'Entertainment', 'Film, Art and Books': 'Entertainment', 'Food, ADVICE, Health': 'Health',
    'Gossip, Fashion': 'Entertainment', 'Gossip, Life & Style': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Mardan': 'National', 'Pakistan, Punjab, Lahore': 'National',
    'Life & Style, Gossip, Fashion': 'Entertainment', 'Pakistan, Cities': 'National',
    'Islamabad, Rawalpindi': 'National', 'Islamabad, Pakistan': 'National', 'Pakistan, Islamabad, World': 'World',
    'Pakistan, Sindh, Nawabshah': 'National', 'Balochistan, Islamabad, Pakistan': 'National', 'Fashion, Life & Style': 'Entertainment',
    'Health, ADVICE, Life & Style': 'Health', 'Music, Life & Style': 'Entertainment',
    'Pakistan, Peshawar, Islamabad, Cities': 'National', 'Punjab, Gilgit-Baltistan': 'National', 'Health, World': 'Health',
    'Gossip, Life & Style, Music': 'Entertainment', 'Gossip, Music': 'Entertainment', 'Gossip, Film, TV': 'Entertainment',
    'Technology, Life & Style': 'Technology', 'Sindh, Pakistan, Karachi, Cities': 'National',
    'Gossip, TV, Film, Life & Style': 'Entertainment', 'Pakistan, World, Islamabad, Cities': 'World',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'Sindh, Nawabshah': 'National', 'Azad Jammu & Kashmir, Pakistan': 'National',
    'TV, Gossip, Life & Style': 'Entertainment', 'Film, TV, Life & Style': 'Entertainment',
    'Khyber Pakhtunkhwa, Peshawar, Sports': 'Sports', 'Film, Gossip, Life & Style': 'Entertainment',
    'Rawalpindi, Islamabad': 'National', 'Sindh, sukkur': 'National', 'Pakistan, World, Islamabad': 'World',
    'Pakistan, Sindh, Karachi, Islamabad': 'National', 'Business, Pakistan': 'Business',
    'ADVICE, Life & Style, Film': 'Entertainment', 'Film, Life & Style, TV, Gossip': 'Entertainment', 'Music, TV': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Islamabad': 'National', 'Music, Gossip, Life & Style': 'Entertainment',
    'Islamabad, Cities, Pakistan': 'National', 'Islamabad, Punjab': 'National', 'Health, Life & Style': 'Health',
    'Technology, World': 'Technology', 'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Hyderabad': 'National', 'Life & Style, TV, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'ADVICE, Life & Style': 'Health', 'Pakistan, Sindh, Balochistan': 'National',
    'Gossip, Life & Style, TV': 'Entertainment', 'Music, Film, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Islamabad': 'National', 'Pakistan, World, Azad Jammu & Kashmir': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World', 'Rawalpindi, Pakistan, Business': 'Business',
    'Khyber Pakhtunkhwa, Peshawar': 'National', 'Sindh, dadu': 'National', 'Fashion, Gossip, Life & Style': 'Entertainment',
    'Pakistan, Karachi, Sindh': 'National', 'Pakistan, Balochistan, Quetta': 'National', 'TV, Life & Style': 'Entertainment',
    'Pakistan, Islamabad, World, Cities': 'World', 'Pakistan, Khyber Pakhtunkhwa, Cities': 'National',
    'Pakistan, Karachi, Sindh, Cities': 'National', 'Balochistan, Quetta': 'National', 'Pakistan, Rawalpindi': 'National',
    'Punjab, Film': 'Entertainment', 'Khyber Pakhtunkhwa, Swat': 'National', 'Balochistan, Gwadar': 'National',
    'TV, Life & Style, Gossip': 'Entertainment', 'Sindh, Karachi, Pakistan, Cities': 'National',
    'Pakistan, Sindh, Islamabad': 'National', 'Pakistan, Punjab, Islamabad, Cities': 'National',
    'Khyber Pakhtunkhwa, Nowshera': 'National', 'TV, Film, Gossip': 'Entertainment', 'Abbottabad': 'National',
    'Pakistan, Karachi, Peshawar': 'National', 'Pakistan, Khyber Pakhtunkhwa': 'National', 'Sports, Hockey': 'Sports',
    'Art and Books, Life & Style': 'Entertainment', 'Pakistan, World, Balochistan': 'World', 'Theatre': 'Entertainment',
    'Gossip, Film, Life & Style': 'Entertainment', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business',
    'Film, TV, Gossip, Life & Style': 'Entertainment', 'Life & Style, Sports': 'Sports',
    'Music, Art and Books, Life & Style, Film': 'Entertainment', 'Pakistan, Sindh, Karachi, Hyderabad, Cities': 'National',
    'Life & Style, ADVICE': 'Health',
    'Pakistan, Sindh, Punjab, Azad Jammu & Kashmir, Gilgit-Baltistan, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Karachi': 'National', 'Fashion, Music': 'Entertainment', 'Film, TV': 'Entertainment',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra, Nowshera, Charsadda': 'National', 'Fashion, Gossip, TV': 'Entertainment',
    'Karachi, Pakistan': 'National', 'Pakistan, Azad Jammu & Kashmir, Muzaffarabad': 'National',
    'Pakistan, Azad Jammu & Kashmir, World': 'World', 'Film, Music, Gossip': 'Entertainment',
    'Pakistan, Punjab, Rawalpindi': 'National', 'Pakistan, Lahore, Punjab': 'National', 'World, Newslab': 'World',
    'Karachi': 'National', 'Pakistan, Islamabad, Azad Jammu & Kashmir': 'National', 'Gossip, Fashion, Life & Style': 'Entertainment',
    'Pakistan, Rawalpindi, Karachi': 'National', 'Art and Books, Gossip, Life & Style': 'Entertainment',
    'Film, Theatre, Life & Style': 'Entertainment', 'Pakistan, Punjab, Islamabad': 'National',
    'Gilgit-Baltistan, Pakistan': 'National',
    'World, Business': 'Business',
    'Gilgit-Baltistan': 'National',
    'Ù¾Ø§Ú©Ø³Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Swat': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Swat': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Swat': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',

    # New mappings for 'daily_times' dataset
    'arts, culture &amp; books': 'Entertainment',
    'arts, culture &amp; books & blog': 'Entertainment',
    'arts, culture &amp; books & culture': 'Entertainment',
    'arts, culture &amp; books & education': 'Entertainment',
    'arts, culture &amp; books & entertainment': 'Entertainment',
    'arts, culture &amp; books & entertainment & world': 'Entertainment',
    'arts, culture &amp; books & featured': 'Entertainment',
    'arts, culture &amp; books & featured & featured': 'Entertainment',
    'arts, culture &amp; books & health': 'Entertainment',
    'arts, culture &amp; books & lifestyle': 'Entertainment',
    'arts, culture &amp; books & lifestyle & pakistan': 'Entertainment',
    'arts, culture &amp; books & pakistan': 'Entertainment',
    'arts, culture &amp; books & perspectives': 'Entertainment',
    'arts, culture &amp; books & top stories': 'Entertainment',
    'arts, culture &amp; books & trending': 'Entertainment',
    'arts, culture &amp; books & world': 'Entertainment',
    'balochistan & business': 'Business',
    'balochistan & business & pakistan & pakistan': 'Business',
    'balochistan & business & pakistan & pakistan & top stories': 'Business',
    'balochistan & business & pakistan & top stories': 'Business',
    'balochistan & education': 'National',
    'balochistan & health': 'Health',
    'balochistan & health & pakistan': 'Health',
    'balochistan & islamabad & khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & islamabad & khyber pakhtunkhwa & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & islamabad & pakistan & pakistan': 'National',
    'balochistan & islamabad & punjab & sindh': 'National',
    'balochistan & khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & khyber pakhtunkhwa & pakistan & punjab & sindh & top stories': 'National',
    'balochistan & pakistan': 'National',
    'balochistan & pakistan & pakistan': 'National',
    'balochistan & pakistan & pakistan & top stories': 'National',
    'balochistan & pakistan & sindh': 'National',
    'balochistan & pakistan & top stories': 'National',
    'balochistan & reviews': 'National',
    'balochistan & sindh': 'National',
    'balochistan & sports': 'Sports',
    'balochistan & top stories': 'National',
    'blog': 'Other',
    'blog & blogs': 'Other',
    'blog & lifestyle & pakistan': 'Entertainment',
    'blogs': 'Other',
    'blogs & culture': 'Other',
    'blogs & featured': 'Other',
    'blogs & gilgit baltistan & sports': 'Sports',
    'blogs & lifestyle': 'Other',
    'blogs & lifestyle & uncategorized': 'Other',
    'blogs & pakistan': 'Other',
    'blogs & perspectives': 'Other',
    'blogs & sports': 'Sports',
    'blogs & trending': 'Other',
    'business & education & pakistan & trending': 'Business',
    'business & featured & pakistan': 'Business',
    'business & finance': 'Business',
    'business & finance & international': 'Business',
    'business & finance & international & pakistan': 'Business',
    'business & finance & pakistan': 'Business',
    'business & finance & pakistan & top stories': 'Business',
    'business & finance & pakistan & uncategorized': 'Business',
    'business & finance & top stories': 'Business',
    'business & health & pakistan': 'Business',
    'business & international': 'Business',
    'business & international & pakistan': 'Business',
    'business & islamabad': 'Business',
    'business & islamabad & pakistan': 'Business',
    'business & islamabad & pakistan & pakistan': 'Business',
    'business & islamabad & pakistan & pakistan & top stories': 'Business',
    'business & khyber pakhtunkhwa': 'Business',
    'business & khyber pakhtunkhwa & pakistan': 'Business',
    'business & lahore & pakistan & punjab': 'Business',
    'business & lifestyle & pakistan': 'Business',
    'business & pakistan': 'Business',
    'business & pakistan & pakistan': 'Business',
    'business & pakistan & pakistan & punjab': 'Business',
    'business & pakistan & pakistan & punjab & top stories': 'Business',
    'business & pakistan & pakistan & sindh': 'Business',
    'business & pakistan & pakistan & sindh & top stories': 'Business',
    'business & pakistan & pakistan & top stories': 'Business',
    'business & pakistan & punjab': 'Business',
    'business & pakistan & punjab & top stories': 'Business',
    'business & pakistan & science and technology': 'Business',
    'business & pakistan & sindh': 'Business',
    'business & pakistan & sindh & top stories': 'Business',
    'business & pakistan & sports': 'Business',
    'business & pakistan & top stories': 'Business',
    'business & pakistan & trending & world': 'Business',
    'business & pakistan & uncategorized': 'Business',
    'business & pakistan & world': 'Business',
    'business & perspectives': 'Business',
    'business & punjab': 'Business',
    'business & reviews': 'Business',
    'business & science and technology & tgif & world': 'Business',
    'business & sindh': 'Business',
    'business & sindh & top stories': 'Business',
    'business & sponsored content': 'Business',
    'business & sports': 'Business',
    'business & top stories': 'Business',
    'business & top stories & trending & uncategorized & world': 'Business',
    'business & top stories & trending & world': 'Business',
    'business & top stories & world': 'Business',
    'business & trending': 'Business',
    'business & trending & world': 'Business',
    'business & uncategorized': 'Business',
    'business & world': 'Business',
    'cartoons': 'Opinion',
    'celebrity interviews & featured & lifestyle': 'Entertainment',
    'celebrity interviews & international & lifestyle': 'Entertainment',
    'celebrity interviews & lifestyle': 'Entertainment',
    'celebrity interviews & lifestyle & pakistan': 'Entertainment',
    'commentary / insight': 'Opinion',
    'commentary / insight & pakistan': 'Opinion',
    'commentary / insight & trending': 'Opinion',
    'culture': 'Entertainment',
    'culture & infotainment & pakistan': 'Entertainment',
    'culture & pakistan': 'Entertainment',
    'culture & world': 'Entertainment',
    'education': 'Other',
    'education & international': 'Other',
    'education & international & world': 'Other',
    'education & lahore & pakistan': 'National',
    'education & pakistan': 'National',
    'education & pakistan & punjab': 'National',
    'education & pakistan & top stories': 'National',
    'education & pakistan & trending & world': 'National',
    'education & punjab': 'National',
    'education & top stories': 'Other',
    'entertainment & fashion & lifestyle': 'Entertainment',
    'entertainment & film and drama reviews': 'Entertainment',
    'entertainment & gossip & lifestyle': 'Entertainment',
    'entertainment & infotainment': 'Entertainment',
    'entertainment & infotainment & international': 'Entertainment',
    'entertainment & infotainment & international & science and technology & social mania & world': 'Entertainment',
    'entertainment & international': 'Entertainment',
    'entertainment & international & lifestyle': 'Entertainment',
    'entertainment & karachi & pakistan & sports & top stories': 'Entertainment',
    'entertainment & karachi & sindh': 'Entertainment',
    'entertainment & lifestyle': 'Entertainment',
    'entertainment & lifestyle & lifestyle': 'Entertainment',
    'entertainment & lifestyle & movies': 'Entertainment',
    'entertainment & lifestyle & music': 'Entertainment',
    'entertainment & lifestyle & pakistan': 'Entertainment',
    'entertainment & lifestyle & trending': 'Entertainment',
    'entertainment & lifestyle & tv': 'Entertainment',
    'entertainment & lifestyle & uncategorized': 'Entertainment',
    'entertainment & lifestyle & world': 'Entertainment',
    'entertainment & music': 'Entertainment',
    'entertainment & music & uncategorized': 'Entertainment',
    'entertainment & pakistan': 'Entertainment',
    'entertainment & pakistan & top stories': 'Entertainment',
    'entertainment & pakistan & trending': 'Entertainment',
    'entertainment & science and technology & top stories': 'Entertainment',
    'entertainment & top stories': 'Entertainment',
    'entertainment & trending': 'Entertainment',
    'entertainment & trending & world': 'Entertainment',
    'entertainment & tv': 'Entertainment',
    'fashion & featured & gossip & lifestyle & trending': 'Entertainment',
    'fashion & gilgit baltistan': 'Entertainment',
    'fashion & international & lifestyle': 'Entertainment',
    'fashion & lifestyle': 'Entertainment',
    'fashion & lifestyle & pakistan': 'Entertainment',
    'fashion & lifestyle & trending': 'Entertainment',
    'featured & gilgit baltistan & pakistan & pakistan & trending': 'Other',
    'featured & international & lifestyle': 'Other',
    'featured & lahore': 'National',
    'featured & lifestyle': 'Other',
    'featured & op-ed': 'Other',
    'featured & pakistan': 'National',
    'featured & pakistan & punjab': 'National',
    'featured & pakistan & top stories': 'National',
    'featured & pakistan & trending & uncategorized': 'Other',
    'featured & pakistan & world': 'World',
    'featured & uncategorized': 'Other',
    'featured & world': 'World',
    'film and drama reviews': 'Entertainment',
    'film and drama reviews & international & lifestyle': 'Entertainment',
    'finance': 'Business',
    'finance & pakistan': 'Business',
    'gilgit baltistan & pakistan': 'National',
    'gilgit baltistan & pakistan & punjab': 'National',
    'gilgit baltistan & pakistan & sports': 'Sports',
    'gilgit baltistan & pakistan & top stories': 'National',
    'gilgit baltistan & top stories': 'National',
    'gossip & international & lifestyle & trending': 'Entertainment',
    'gossip & lifestyle': 'Entertainment',
    'gossip & lifestyle & trending': 'Entertainment',
    'health & international': 'Health',
    'health & islamabad': 'Health',
    'health & islamabad & pakistan & pakistan': 'Health',
    'health & islamabad & pakistan & top stories': 'Health',
    'health & khyber pakhtunkhwa': 'Health',
    'health & khyber pakhtunkhwa & pakistan': 'Health',
    'health & lahore & pakistan & punjab & top stories': 'Health',
    'health & lahore & pakistan & top stories': 'Health',
    'health & lahore & top stories': 'Health',
    'health & lifestyle': 'Health',
    'health & pakistan': 'Health',
    'health & pakistan & punjab': 'Health',
    'health & pakistan & punjab & top stories': 'Health',
    'health & pakistan & sindh': 'Health',
    'health & pakistan & top stories': 'Health',
    'health & pakistan & world': 'World',
    'health & punjab': 'Health',
    'health & social mania': 'Health',
    'health & sports & top stories': 'Sports',
    'health & top stories': 'Health',
    'health & top stories & world': 'World',
    'health & world': 'World',
    'infotainment': 'Entertainment',
    'infotainment & international': 'Entertainment',
    'infotainment & international & world': 'Entertainment',
    'infotainment & lahore & pakistan': 'Entertainment',
    'infotainment & lifestyle': 'Entertainment',
    'infotainment & lifestyle & world': 'Entertainment',
    'infotainment & pakistan': 'Entertainment',
    'infotainment & science and technology': 'Technology',
    'infotainment & top stories': 'Entertainment',
    'infotainment & trending': 'Entertainment',
    'infotainment & world': 'Entertainment',
    'international': 'World',
    'international & islamabad & pakistan': 'World',
    'international & karachi & lifestyle': 'Entertainment',
    'international & kashmir': 'World',
    'international & lifestyle': 'Entertainment',
    'international & lifestyle & movies': 'Entertainment',
    'international & lifestyle & pakistan': 'Entertainment',
    'international & movies': 'Entertainment',
    'international & pakistan': 'World',
    'international & pakistan & top stories': 'World',
    'international & pakistan & world': 'World',
    'international & science and technology': 'Technology',
    'international & science and technology & world': 'Technology',
    'international & sports': 'Sports',
    'international & top stories': 'World',
    'international & top stories & world': 'World',
    'international & travel': 'World',
    'international & world': 'World',
    'international & world & world': 'World',
    'islamabad & kashmir & top stories': 'National',
    'islamabad & lahore & pakistan': 'National',
    'islamabad & lahore & pakistan & sports': 'Sports',
    'islamabad & lahore & pakistan & sports & top stories': 'Sports',
    'islamabad & lahore & pakistan & top stories': 'National',
    'islamabad & lahore & sports & top stories': 'Sports',
    'islamabad & lifestyle': 'Other',
    'islamabad & lifestyle & pakistan': 'Other',
    'islamabad & pakistan': 'National',
    'islamabad & pakistan & pakistan': 'National',
    'islamabad & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'islamabad & pakistan & pakistan & punjab & top stories': 'National',
    'islamabad & pakistan & pakistan & sindh & top stories': 'National',
    'islamabad & pakistan & pakistan & top stories': 'National',
    'islamabad & pakistan & punjab': 'National',
    'islamabad & pakistan & punjab & top stories': 'National',
    'islamabad & pakistan & sindh': 'National',
    'islamabad & pakistan & sports': 'Sports',
    'islamabad & pakistan & top stories': 'National',
    'islamabad & pakistan & trending': 'National',
    'islamabad & pakistan & world': 'World',
    'islamabad & sports': 'Sports',
    'islamabad & top stories': 'National',
    'karachi & pakistan': 'National',
    'karachi & pakistan & sindh': 'National',
    'karachi & pakistan & sindh & sports': 'Sports',
    'karachi & pakistan & sindh & top stories': 'National',
    'karachi & pakistan & top stories': 'National',
    'karachi & sindh': 'National',
    'karachi & sindh & top stories': 'National',
    'kashmir': 'National',
    'kashmir & pakistan': 'National',
    'kashmir & pakistan & top stories': 'National',
    'kashmir & pakistan & uncategorized': 'National',
    'kashmir & pakistan & world': 'World',
    'kashmir & top stories': 'National',
    'kashmir & top stories & world': 'World',
    'khyber pakhtunkhwa & pakistan': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan & punjab & sindh & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan & sindh & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & pakistan & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & punjab': 'National',
    'khyber pakhtunkhwa & pakistan & sindh': 'National',
    'khyber pakhtunkhwa & pakistan & sindh & top stories': 'National',
    'khyber pakhtunkhwa & pakistan & top stories': 'National',
    'khyber pakhtunkhwa & top stories': 'National',
    'lahore & pakistan': 'National',
    'lahore & pakistan & pakistan': 'National',
    'lahore & pakistan & punjab': 'National',
    'lahore & pakistan & punjab & top stories': 'National',
    'lahore & pakistan & sports': 'Sports',
    'lahore & pakistan & sports & top stories': 'Sports',
    'lahore & pakistan & top stories': 'National',
    'lahore & punjab': 'National',
    'lahore & punjab & top stories': 'National',
    'lahore & top stories': 'National',
    'lifestyle': 'Entertainment',
    'lifestyle & lifestyle': 'Entertainment',
    'lifestyle & movies': 'Entertainment',
    'lifestyle & movies & music & trending': 'Entertainment',
    'lifestyle & movies & pakistan': 'Entertainment',
    'lifestyle & movies & trending': 'Entertainment',
    'lifestyle & music': 'Entertainment',
    'lifestyle & music & tv': 'Entertainment',
    'lifestyle & pakistan': 'Entertainment',
    'lifestyle & pakistan & top stories': 'Entertainment',
    'lifestyle & pakistan & trending': 'Entertainment',
    'lifestyle & reviews': 'Entertainment',
    'lifestyle & science and technology & sports': 'Entertainment',
    'lifestyle & sports': 'Sports',
    'lifestyle & top stories': 'Entertainment',
    'lifestyle & top stories & uncategorized': 'Entertainment',
    'lifestyle & trending': 'Entertainment',
    'lifestyle & trending & tv': 'Entertainment',
    'lifestyle & tv': 'Entertainment',
    'lifestyle & uncategorized': 'Entertainment',
    'lifestyle & world': 'Entertainment',
    'music': 'Entertainment',
    'off-beat': 'Other',
    'off-beat & pakistan': 'Other',
    'off-beat & top stories': 'Other',
    'off-beat & top stories & world': 'Other',
    'off-beat & uncategorized': 'Other',
    'off-beat & world': 'Other',
    'op-ed': 'Opinion',
    'op-ed & pakistan': 'Opinion',
    'pakistan & pakistan': 'National',
    'pakistan & pakistan & punjab': 'National',
    'pakistan & pakistan & punjab & top stories': 'National',
    'pakistan & pakistan & sindh': 'National',
    'pakistan & pakistan & sindh & top stories': 'National',
    'pakistan & pakistan & sindh & top stories & trending': 'National',
    'pakistan & pakistan & sports': 'Sports',
    'pakistan & pakistan & top stories': 'National',
    'pakistan & pakistan & top stories & world': 'World',
    'pakistan & pakistan & uncategorized': 'Other',
    'pakistan & pakistan & world': 'World',
    'pakistan & pakistan & world & world': 'World',
    'pakistan & perspectives': 'Other',
    'pakistan & punjab': 'National',
    'pakistan & punjab & sindh': 'National',
    'pakistan & punjab & sindh & top stories': 'National',
    'pakistan & punjab & top stories': 'National',
    'pakistan & punjab & trending': 'National',
    'pakistan & punjab & world': 'World',
    'pakistan & reviews': 'Other',
    'pakistan & science and technology': 'Technology',
    'pakistan & sindh': 'National',
    'pakistan & sindh & sports': 'Sports',
    'pakistan & sindh & top stories': 'National',
    'pakistan & sindh & top stories & trending': 'National',
    'pakistan & sindh & trending': 'National',
    'pakistan & sindh & uncategorized': 'Other',
    'pakistan & sponsored content': 'Other',
    'pakistan & sports': 'Sports',
    'pakistan & sports & sports': 'Sports',
    'pakistan & sports & top stories': 'Sports',
    'pakistan & sports & uncategorized': 'Other',
    'pakistan & sports & world': 'World',
    'pakistan & top stories': 'National',
    'pakistan & top stories & travel': 'Other',
    'pakistan & top stories & trending': 'National',
    'pakistan & top stories & uncategorized': 'Other',
    'pakistan & top stories & world': 'World',
    'pakistan & travel': 'Other',
    'pakistan & trending': 'National',
    'pakistan & trending & trending': 'National',
    'pakistan & trending & trending & world': 'World',
    'pakistan & trending & uncategorized': 'Other',
    'pakistan & uncategorized': 'Other',
    'pakistan & uncategorized & world': 'World',
    'pakistan & world': 'World',
    'pakistan & world & world': 'World',
    'perspectives': 'Other',
    'perspectives & world': 'World',
    'punjab & sindh': 'National',
    'punjab & top stories': 'National',
    'punjab & trending': 'National',
    'punjab & uncategorized': 'Other',
    'region': 'National',
    'reviews': 'Other',
    'reviews & sports': 'Sports',
    'reviews & trending': 'Other',
    'sci-tec & world': 'Technology',
    'science and technology': 'Technology',
    'science and technology & top stories': 'Technology',
    'science and technology & trending': 'Technology',
    'science and technology & world': 'Technology',
    'sindh & sports': 'Sports',
    'sindh & top stories': 'National',
    'sindh & trending': 'National',
    'social mania & tgif & trending & world': 'Other',
    'sponsored content & trending': 'Other',
    'sports & sports': 'Sports',
    'sports & sports & uncategorized': 'Sports',
    'sports & top stories': 'Sports',
    'sports & top stories & trending': 'Sports',
    'sports & top stories & world': 'World',
    'sports & trending': 'Sports',
    'sports & uncategorized': 'Sports',
    'top stories': 'Other',
    'top stories & travel': 'Other',
    'top stories & trending & trending & world': 'World',
    'top stories & trending & world': 'World',
    'top stories & uncategorized': 'Other',
    'top stories & uncategorized & world': 'Other',
    'top stories & world': 'World',
    'top stories & world & world': 'World',
    'travel': 'Other',
    'travel & world': 'World',
    'trending': 'Other',
    'trending & trending & world': 'World',
    'trending & world': 'World',
    'uncategorized & world': 'Other',
    'world & world': 'World',
    'ânot yet provided sufficient scientific and factual grounds for its behavior': 'Other',

    # New mappings for 'business_recorder(2020-2023)'
    'editorials ': 'Opinion',
    'technology ': 'Technology',
    'business & finance ': 'Business',
    'pakistan ': 'National',
    'sports ': 'Sports',
    'world ': 'World',
    'markets ': 'Business',
    'br research ': 'Business',
    'life & style ': 'Entertainment',
    'opinion ': 'Opinion',
    'print ': 'Other',
    'perspectives ': 'Opinion',
    'top news ': 'Other',
    'top stories ': 'Other',
    'rates ': 'Business',
    'brief recordings ': 'Other',
    'epaper ': 'Other',
    'budget ': 'Business',
    'supplements ': 'Other',
    'weather ': 'Other',
    '000 square feet for apartments.2 services provided for personal care by beauty 9810.0000': 'Other',
    '1969 (iv of 1969).52 goods imported by or donated to hospitals run by the federal government or a provincial government; and non-profit making educational and research institutions subject to the similar restrictions': 'Other',
    '1979 (xviii of 1979)': 'Other',
    '2021 made part of finance billthe below-referred significant changes made through the tax laws (amendment) ordinance': 'Other',
    '2021 notified by the board with such conditions': 'Other',
    '337.75 10': 'Other',
    '829 billion. in part ii of my speech': 'Other',
    'as a general rule': 'Other',
    'but thereâ\x80\x99s a reason this loss is happening': 'Other',
    'categories': 'Other',
    'ehl e tashi': 'Other',
    'natural gas and crude oil declined by 5.97': 'Other',
    'that\'s going to run the project which is to be started. i hope that it should be done in two to three months. the innovation over here is rather than the government running the zone ourselves': 'Other',
    'we ran these randomised control trials that covered around 75': 'Other',
    'nan': 'Other'
}

# Create a new mapping with all keys converted to lowercase
lowercase_category_mapping = {k.lower(): v for k, v in category_mapping.items()}

for name, df in dataframes.items():

    if 'categories' in df.columns:
        df['categories'] = df['categories'].astype(str).str.lower().str.strip()

    # Apply the lowercase mapping
    df['mapped_categories'] = df['categories'].map(lowercase_category_mapping)

    # Fill any remaining NaNs with 'Other'
    df['mapped_categories'] = df['mapped_categories'].fillna('Other')

for name, df in dataframes.items():
    unique_mapped_categories = df['mapped_categories'].unique()
    print(f"\nUnique mapped categories in {name}:")
    print(unique_mapped_categories)
    nan_count = df['mapped_categories'].isnull().sum()
    print(f"Number of NaN values in 'mapped_categories' in {name}: {nan_count}")

Unique Categories:
- 000 square feet for apartments.2                services provided for personal care by beauty                 9810.0000
- 1969 (iv of 1969).52                                    goods imported by or donated to hospitals run                                      by the federal government or a provincial                                      government; and non-profit making educational                                      and research institutions subject to the                                      similar restrictions
- 1979                                (xviii of 1979)
- 2021 made part of finance billthe below-referred significant changes made through the tax laws (amendment) ordinance
- 2021 notified by the board                      with such conditions
- 337.75           10
- 829 billion. in part ii of my speech
- as a general rule
- asia cup
- br research
- brief recordings
- budget
- budget 2021-22
- budget 2022-23
- business
- business & finance
- but thereâ

In [None]:
filtered_dataframes = {}

output_file_path = "/content/drive/MyDrive/project dataset/business_recorder.csv"

for name, df in dataframes.items():
    # Filter for 'National' or 'World'
    filtered_df = df[df['mapped_categories'].isin(['National', 'World'])]

    filtered_dataframes[name + '_filtered'] = filtered_df

    # Save directly to Google Drive
    filtered_df.to_csv(output_file_path, index=False)

    print(f"Filtered data saved to Drive at: {output_file_path}")
    print(f"\nFiltered DataFrame: {name}_filtered")
    display(filtered_df.head())
    print(f"Shape of filtered DataFrame: {filtered_df.shape}")

## **Tribune**

In [12]:
selected_files4 = files[1:2]
print("Selected files:", selected_files4)

dataframes = {}

for file in selected_files4:
    file_path = os.path.join(data_path, file)
    df_name = os.path.splitext(file)[0]
    try:
        # load CSV
        df = pd.read_csv(file_path, encoding="latin-1", index_col=False)

        # remove unnamed columns
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

        # store cleaned dataframe
        dataframes[df_name] = df
        print(f"Loaded {file} (cleaned unnamed columns)")
    except Exception as e:
        print(f"Error loading {file}: {e}")

# display summary
for name, df in dataframes.items():
    print(f"\nDataFrame: {name}")
    display(df.head())
    print(f"Columns: {df.columns.tolist()}")


Selected files: ['tribune(full-data).csv']
Loaded tribune(full-data).csv (cleaned unnamed columns)

DataFrame: tribune(full-data)


Unnamed: 0,headline,date,link,source,categories,description
0,Shehbaz expected to return in 2nd week of Jan:...,1/1/2020,https://tribune.com.pk/story/2128765/shehbaz-e...,Tribune,"Pakistan, Punjab",LAHORE:PML-N President Shehbaz Sharif will ret...
1,US embassy in Baghdad 'suspends consular opera...,1/1/2020,https://tribune.com.pk/story/2128759/us-embass...,Tribune,World,BAGHDAD:The US Embassy in Baghdad said on Wedn...
2,"After seven years of hiatus, New Year festivit...",1/1/2020,https://tribune.com.pk/story/2128748/seven-yea...,Tribune,"Pakistan, K-P",PESHAWAR:As the sun rose high on the first day...
3,WATCH: Angry Pope slaps hand of woman who refu...,1/1/2020,https://tribune.com.pk/story/2128745/watch-ang...,Tribune,World,VATICAN CITY:Pope Francis apologised on Wednes...
4,Australia coach hoping for rain for third Test...,1/1/2020,https://tribune.com.pk/story/2128738/australia...,Tribune,Sports,Australiaâs coach Justin Langer found himsel...


Columns: ['headline', 'date', 'link', 'source', 'categories', 'description']


In [13]:
all_unique_categories = set()

for df_name, df in dataframes.items():
    if 'categories' in df.columns:
        # Ensure 'categories' column is treated as strings and handle NaNs
        df['categories'] = df['categories'].astype(str).str.lower()
        for categories_str in df['categories'].dropna().unique():
            # Split by comma and clean up whitespace
            for category in categories_str.split(','):
                cleaned_category = category.strip()
                if cleaned_category and cleaned_category != 'nan':
                    all_unique_categories.add(cleaned_category)

print("Unique Categories:")
for category in sorted(list(all_unique_categories)):
    print(f"- {category}")

Unique Categories:
- abbottabad
- advice
- archives
- art and books
- azad jammu & kashmir
- badin
- balochistan
- bannu
- bollywood
- business
- charsadda
- cities
- cricket
- dadu
- dera ismail khan
- derabugti
- editorial
- faisalabad
- fashion
- film
- food
- football
- games
- gilgit
- gilgit baltistan
- gilgit-baltistan
- gossip
- gujranwala
- gwadar
- health
- hockey
- hyderabad
- islamabad
- jacobabad
- jammu & kashmir
- k-p
- karachi
- khairpur
- khyber pakhtunkhwa
- khyber-pakhtunkhwa
- lahore
- larkana
- latest
- life & style
- life &amp; style
- life and style
- magazine
- mansehra
- mardan
- mirpur
- movies
- multan
- multimedia
- music
- muzaffarabad
- nawabshah
- newslab
- nowshera
- opinion
- pakistan
- peshawar
- politics
- punjab
- quetta
- rawalpindi
- sanghar
- sindh
- skardu
- slideshows
- sports
- spotlight
- sukkur
- swat
- t.edit
- technology
- tennis
- tharparkar
- thatta
- theatre
- trends
- tv
- videos
- world
- zhob
- ù¾ø§ú©ø³øªø§ù


In [14]:
broad_categories = ['Business', 'Politics', 'National', 'Sports', 'Opinion', 'Entertainment',
                    'Technology', 'Health', 'Food', 'World', 'Other']

category_mapping = {
    'Business': 'Business', 'Pakistan, Business': 'Business', 'Business, World': 'Business',
    'Balochistan, Business': 'Business', 'Punjab, Business': 'Business', 'Business, Technology': 'Business',
    'Business, K-P': 'Business', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business', 'Pakistan, Business, Islamabad': 'Business',
    'GOVERNANCE': 'Politics', 'GOVERNANCE & HEADLINES': 'Politics',
    'GOVERNANCE & HEADLINES & Top Headlines': 'Politics', 'GOVERNANCE & NATIONAL': 'Politics',
    'GOVERNANCE & HEADLINES & NATIONAL': 'Politics', 'POLITICS': 'Politics',
    'POLITICS, Pakistan': 'Politics', 'POLITICS, World': 'Politics', 'Pakistan, POLITICS': 'Politics',
    'NATIONAL': 'National', 'ISLAMABAD': 'National', 'KARACHI': 'National', 'LAHORE': 'National',
    'PESHAWAR': 'National', 'MULTAN': 'National', 'CITY': 'National', 'Rawalpindi': 'National',
    'Pakistan': 'National', 'Punjab': 'National', 'Sindh': 'National', 'K-P': 'National',
    'Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Azad Jammu & Kashmir': 'National',
    'Sports': 'Sports', 'Sports & Top Headlines': 'Sports', 'Sports & World': 'Sports',
    'Sports, K-P': 'Sports', 'Sports, Pakistan': 'Sports', 'Sports, TV': 'Sports',
    'Sports, Hockey': 'Sports', 'Sports, Cricket': 'Sports', 'Football': 'Sports', 'Tennis': 'Sports',
    'Sports, Punjab': 'Sports', 'Sports, Life & Style': 'Sports', 'Sports, Multan, Cities': 'Sports',
    'Editorials': 'Opinion', 'Editorials & Letters & Opinion': 'Opinion', 'Comment': 'Opinion',
    'Comment & Opinion': 'Opinion', 'Letters': 'Opinion', 'Opinion': 'Opinion', 'Cartoon & Opinion': 'Opinion',
    'Comment & HEADLINES & Opinion': 'Opinion', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'Entertainment': 'Entertainment', 'Life & Style': 'Entertainment', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Music': 'Entertainment', 'Life & Style, TV': 'Entertainment', 'Life & Style, Gossip': 'Entertainment',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Food': 'Entertainment',
    'Life & Style, Art and Books': 'Entertainment', 'Life & Style, Theatre': 'Entertainment',
    'Life & Style, Spotlight': 'Entertainment', 'Life & Style, Bollywood': 'Entertainment',
    'Life & Style, K-P': 'Entertainment', 'Life & Style, Health': 'Entertainment',
    'Technology': 'Technology', 'World, Technology': 'Technology', 'Technology, Business': 'Technology',
    'Technology, Sports': 'Technology', 'Technology, Food': 'Technology', 'Technology, Games': 'Technology',
    'Technology, Life & Style': 'Technology', 'Technology, Pakistan': 'Technology',
    'Health': 'Health', 'Pakistan, Health': 'Health', 'Sindh, Health': 'Health',
    'Jammu & Kashmir, Health': 'Health', 'Health, ADVICE': 'Health', 'Food, ADVICE, Health': 'Health',
    'Health, Life & Style': 'Health', 'Health, Latest': 'Health', 'Food': 'Food', 'Food, Spotlight': 'Food',
    'Technology, Food': 'Food',
    'World': 'World', 'Pakistan, World': 'World', 'World, Jammu & Kashmir': 'World', 'World, Sports': 'World',
    'World, Gilgit Baltistan': 'World', 'World, Life & Style': 'World', 'World, Music': 'World',
    'World, Fashion': 'World', 'World, Health': 'World', 'World, K-P': 'World', 'World, archives': 'World',
    'World, Videos': 'World', 'World, Opinion': 'World', 'World, Food, Technology': 'World', 'World, Bollywood': 'World',
    'World, Newslab': 'World', 'Pakistan, World, Islamabad': 'World', 'Pakistan, Islamabad, World, Cities': 'World',
    'Pakistan, World, Balochistan': 'World', 'Pakistan, Azad Jammu & Kashmir, World': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World',

    'Uncategorized': 'Other', 'SPONSORED': 'Other', 'Sponsored Content': 'Other',
    'NATIVE CONTENT': 'Other', 'Archives': 'Other', 'Slideshows, World': 'Other', 'Latest': 'Other',
    'Multimedia': 'Other', 'T.Edit': 'Other', 'NATIVE CONTENT & Pakistan Today': 'Other',
    'HEADLINES': 'Other', 'HEADLINES & Top Headlines': 'Other', 'FEATURED & top Featured': 'Other',
    'FEATURED & Top Non Business': 'Other', 'FEATURED': 'Other', 'E-papers & Profit Magazine': 'Other',
    'E-papers & Pakistan Today': 'Other', 'Top Headlines': 'Other', 'NATIONAL & Top Non Business': 'Other',
    'Editorials & Opinion': 'Opinion', 'Letters & Opinion': 'Opinion', 'NATIONAL & Top Headlines': 'Other',
    'NATIONAL & World': 'World', 'Top Non Business & World': 'World', 'LAHORE & NATIONAL': 'National',
    'Analysis & HEADLINES': 'Other', 'HEADLINES & NATIONAL': 'Other', 'CITY & LAHORE': 'National',
    'HEADLINES & NATIONAL & Top Headlines': 'Other', 'Top Headlines & World': 'World', 'KARACHI & NATIONAL': 'National',
    'NATIONAL & PESHAWAR': 'National', 'HEADLINES & KARACHI': 'Other', 'HEADLINES & LAHORE': 'Other',
    'HEADLINES & PESHAWAR': 'Other', 'INTERVIEW & Top Headlines': 'Other', 'ISLAMABAD & NATIONAL': 'National',
    'CITY & HEADLINES & LAHORE': 'Other', 'HEADLINES & Sports': 'Sports', 'KARACHI & LAHORE & NATIONAL': 'National',
    'KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Non Business': 'Other', 'FEATURED & Top Headlines': 'Other',
    'NATIONAL & Sports': 'Sports', 'KARACHI & NATIONAL & Top Headlines': 'National', 'CITY & ISLAMABAD': 'National',
    'HEADLINES & Top Headlines & World': 'World', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'HEADLINES & LAHORE & NATIONAL': 'Other',
    'Analysis & HEADLINES & NATIONAL & Top Headlines': 'Other',
    'Book Review': 'Other', 'LAHORE & NATIONAL & Top Headlines': 'National',
    'HEADLINES & LAHORE & NATIONAL & Top Headlines': 'Other', 'CITY & KARACHI & NATIONAL': 'National',
    'LAHORE & NATIONAL & PESHAWAR': 'National', 'Comment & Editorials': 'Opinion',
    'HEADLINES & Sports & Top Headlines': 'Sports', 'Book Review & E-papers & Pakistan Today': 'Other',
    'CITY & HEADLINES & NATIONAL': 'Other', 'CITY & NATIONAL': 'National', 'FEATURED & NATIONAL': 'Other',
    'ISLAMABAD & KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Headlines & World': 'World',
    'LAHORE & Top Headlines': 'Other', 'HEADLINES & ISLAMABAD & NATIONAL': 'Other',
    'HEADLINES & NATIONAL & Sports & Top Headlines': 'Sports', 'NATIONAL & Sports & Top Headlines': 'Sports',
    'Analysis & E-papers & Pakistan Today': 'Other', 'CITY & HEADLINES': 'Other', 'Sports & World': 'Sports',
    'HEADLINES & ISLAMABAD & LAHORE & NATIONAL': 'Other', 'CITY & FEATURED & LAHORE': 'Other',
    'NATIONAL & Opinion': 'Opinion', 'Entertainment & World': 'Entertainment',
    'HEADLINES & ISLAMABAD & NATIONAL & Pakistan Today': 'Other',
    'NATIONAL & Sports & Top Headlines & Top Non Business': 'Sports', 'HEADLINES & KARACHI & NATIONAL': 'Other',
    'OIC & World': 'World', 'Agriculture & NATIONAL': 'National', 'NATIONAL & top Featured': 'Other',
    'CITY & Education & MULTAN': 'National', 'Cartoon & Letters': 'Opinion', 'FEATURED & HEADLINES & NATIONAL': 'Other',
    'FEATURED & Sports': 'Sports', 'Comment & Letters': 'Opinion', 'ISLAMABAD & SPONSORED': 'Other',
    'top Featured & World': 'World', 'HEADLINES & NATIONAL & top Featured': 'Other',
    'NATIONAL & Top Headlines & World': 'World', 'NATIONAL & top Featured & World': 'World',
    'MULTAN & NATIONAL': 'National', 'NATIONAL & top Featured & Top Headlines': 'Other',
    'NATIONAL & PESHAWAR & Top Headlines': 'National', 'CITY & PESHAWAR': 'National',
    'HEADLINES & Letters & Opinion': 'Opinion', 'CITY & LAHORE & Opinion': 'Opinion',
    'ISLAMABAD & Pakistan Today': 'Other', 'top Featured & Top Headlines': 'Other', 'top Featured': 'Other',
    'PESHAWAR & Sports': 'Sports', 'Sports & top Featured': 'Sports', 'E-papers': 'Other',
    'CITY & ISLAMABAD & NATIONAL': 'National', 'Comment & NATIONAL & Opinion': 'Opinion',
    'Book Review & NATIONAL': 'Other', 'Entertainment & NATIONAL': 'Entertainment', 'Analysis & NATIONAL': 'Other',
    'NATIONAL & Pakistan Today': 'Other', 'Cartoon & Editorials': 'Opinion', 'CITY & LAHORE & NATIONAL': 'National',
    'Education & NATIONAL': 'National', 'NATIVE CONTENT & Pakistan Today': 'Other',

    'Pakistan, Punjab': 'National', 'Pakistan, K-P': 'National', 'Life & Style, Film, Gossip': 'Entertainment',
    'Life & Style, TV': 'Entertainment', 'Sindh': 'National', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Gossip': 'Entertainment', 'Life & Style, Music': 'Entertainment', 'Punjab': 'National', 'K-P': 'National',
    'Editorial': 'Opinion', 'Balochistan': 'National', 'Sindh, Health': 'Health', 'Pakistan, Life & Style': 'Entertainment',
    'Jammu & Kashmir, Health': 'Health', 'Pakistan, Sindh': 'National', 'K-P, Music': 'Entertainment',
    'Life & Style': 'Entertainment', 'Pakistan, Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Jammu & Kashmir': 'National',
    'Life & Style, Art and Books, Music': 'Entertainment', 'Life & Style, Fashion, Gossip': 'Entertainment',
    'Life & Style, Music, Gossip': 'Entertainment', 'Pakistan, Jammu & Kashmir': 'National', 'Sindh, Punjab': 'National',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Film, TV': 'Entertainment', 'Balochistan, Business': 'Business',
    'Life & Style, Health': 'Health', 'Punjab, Business': 'Business', 'Music, Film': 'Entertainment', 'TV': 'Entertainment',
    'Life & Style, Music, Food': 'Entertainment', 'Pakistan, Health': 'Health', 'Balochistan, K-P': 'National',
    'Sindh, Technology': 'Technology', 'Film': 'Entertainment', 'Sindh, Life & Style, Music': 'Entertainment',
    'Life & Style, Gossip, TV': 'Entertainment', 'Life & Style, Art and Books': 'Entertainment', 'K-P, Technology': 'Technology',
    'Magazine': 'Other', 'Film, Gossip': 'Entertainment', 'Life & Style, Theatre': 'Entertainment', 'Business, Technology': 'Business',
    'Balochistan, Gilgit Baltistan': 'National', 'K-P, Health': 'Health', 'Pakistan, Gilgit Baltistan': 'National',
    'Life & Style, Film, Fashion': 'Entertainment', 'Fashion': 'Entertainment', 'Punjab, World': 'World',
    'Pakistan, Sports': 'Sports', 'Pakistan, Technology': 'Technology', 'Balochistan, Health': 'Health',
    'Pakistan, Sindh, Art and Books': 'Entertainment', 'Life & Style, Fashion, TV': 'Entertainment', 'TV, Theatre': 'Entertainment',
    'Life & Style, Food': 'Food', 'Pakistan, Film': 'Entertainment', 'Health': 'Health', 'World, K-P': 'World',
    'Life & Style, K-P': 'Entertainment', 'Art and Books': 'Entertainment', 'Opinion, Health': 'Opinion',
    'Life & Style, Music, TV': 'Entertainment', 'Sports, Life & Style': 'Sports', 'Sindh, Jammu & Kashmir': 'National',
    'Sindh, Business': 'Business', 'Life & Style, Film, Theatre': 'Entertainment', 'Sindh, Sports': 'Sports',
    'archives': 'Other', 'Music, Health': 'Health', 'Punjab, Technology': 'Technology', 'Punjab, Health': 'Health',
    'Life & Style, Food, Gossip': 'Entertainment', 'Life & Style, Food, Health': 'Health', 'Sports, K-P': 'Sports',
    'Sindh, Life & Style': 'Entertainment', 'Life & Style, Music, Theatre': 'Entertainment', 'Sports, Videos': 'Sports',
    'Opinion, Technology': 'Opinion', 'Sindh, World': 'World', 'Pakistan, K-P, Health': 'Health',
    'Pakistan, Sindh, Punjab': 'National', 'Life & Style, Opinion': 'Opinion', 'Balochistan, Life & Style': 'Entertainment',
    'Life & Style, Art and Books, Film': 'Entertainment', 'Pakistan, Opinion': 'Opinion', 'Punjab, Jammu & Kashmir': 'National',
    'Punjab, Sports': 'Sports', 'Life & Style, Technology': 'Technology', 'Gossip': 'Entertainment',
    'Life & Style, Music, Film': 'Entertainment', 'Life & Style, Health, TV': 'Health', 'Technology, Games': 'Technology',
    'Pakistan, World, Jammu & Kashmir': 'World', 'Life & Style, Music, Fashion': 'Entertainment',
    'Life & Style, Art and Books, Health': 'Health', 'Sindh, Videos': 'Other', 'Punjab, Food': 'Food',
    'Life & Style, Film, Health': 'Health', 'Sports, Multan, Cities': 'Sports', 'Music, Fashion': 'Entertainment',
    'Videos': 'Other', 'K-P, Art and Books': 'Entertainment', 'Music, Gossip': 'Entertainment',
    'Business, Jammu & Kashmir': 'Business', 'Sindh, Balochistan': 'National', 'Opinion, Editorial': 'Opinion',
    'Pakistan, archives': 'Other', 'Jammu & Kashmir, Gilgit Baltistan': 'National', 'Punjab, K-P': 'National',
    'Business, K-P': 'Business', 'Life & Style, Fashion, Health': 'Health', 'World, Azad Jammu & Kashmir': 'World',
    'Life &amp; Style, TV': 'Entertainment', 'Sindh, Business, Health': 'Business', 'Sports, Business': 'Sports',
    'Punjab, Business, Lahore, Cities': 'Business', 'Punjab, Life & Style, Gossip': 'Entertainment',
    'Life & Style, Art and Books, TV': 'Entertainment', 'Sindh, Karachi, Cities': 'National', 'Sindh, archives': 'Other',
    'Pakistan, Islamabad': 'National', 'life and style': 'Entertainment', 'life and style, Music': 'Entertainment',
    'Islamabad': 'National', 'Pakistan, Health, Food': 'Health', 'Pakistan, Khyber-Pakhtunkhwa': 'National',
    'Music, Film, Theatre': 'Entertainment', 'Music': 'Entertainment', 'Cricket': 'Sports',
    'Film, Gossip, Bollywood': 'Entertainment', 'Khyber-Pakhtunkhwa': 'National', 'Sindh, Karachi': 'National',
    'Khyber-Pakhtunkhwa, Pakistan': 'National', 'World, Azad Jammu & Kashmir, Cities': 'World',
    'Pakistan, Islamabad, Cities': 'National', 'Gossip, TV, Music': 'Entertainment',
    'Music, Pakistan, Life & Style': 'Entertainment', 'Bollywood, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir': 'National', 'Art and Books, Film, Games': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Cities': 'National', 'Sindh, Hyderabad': 'National', 'Sindh, Cities': 'National',
    'Pakistan, Sindh, Cities': 'National', 'Art and Books, Film': 'Entertainment', 'Pakistan, Lahore': 'National',
    'Sports, TV, Gossip': 'Sports', 'Punjab, Pakistan, Lahore': 'National', 'Games': 'Other',
    'Khyber-Pakhtunkhwa, Swat': 'National', 'Pakistan, Sindh, Karachi, Business': 'Business',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National', 'Football': 'Sports', 'Pakistan, Peshawar': 'National',
    'TV, Sports': 'Sports', 'Khyber-Pakhtunkhwa, Cities, Peshawar': 'National', 'Rawalpindi': 'National',
    'Slideshows, World': 'World', 'Pakistan, Azad Jammu & Kashmir, Cities': 'National',
    'Pakistan, Cities, Khyber Pakhtunkhwa': 'National', 'Gossip, Film': 'Entertainment', 'Health, ADVICE': 'Health',
    'TV, Film': 'Entertainment', 'Pakistan, Lahore, Cities': 'National', 'Pakistan, Gwadar, Cities': 'National',
    'Rawalpindi, Punjab': 'National', 'Punjab, Rawalpindi': 'National', 'Khyber-Pakhtunkhwa, Peshawar': 'National',
    'Punjab, Lahore': 'National', 'Pakistan, Balochistan, Cities': 'National', 'Sindh, khairpur': 'National',
    'Islamabad, Pakistan, Cities': 'National', 'Pakistan, Khyber-Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Karachi': 'National', 'Khyber-Pakhtunkhwa, Abbottabad': 'National', 'TV, Gossip': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Gilgit-Baltistan': 'National', 'World, Pakistan': 'World',
    'Khyber-Pakhtunkhwa, Cities': 'National', 'Fashion, Gossip': 'Entertainment', 'Islamabad, World': 'World',
    'Punjab, Multan': 'National', 'Punjab, Faisalabad': 'National', 'Pakistan, Islamabad, Sindh': 'National',
    'Pakistan, Karachi, Cities': 'National', 'Islamabad, Business': 'Business', 'Sindh, Pakistan': 'National',
    'Pakistan, Islamabad, Punjab': 'National', 'Pakistan, Sindh, Karachi, Cities': 'National',
    'Pakistan, Cities, Azad Jammu & Kashmir': 'National', 'Health, Life & Style, ADVICE': 'Health',
    'Islamabad, Balochistan': 'National', 'Pakistan, Gilgit-Baltistan': 'National', 'Pakistan, Punjab, Cities': 'National',
    'Sindh, tharparkar': 'National', 'Business, Gilgit-Baltistan': 'Business', 'Khyber Pakhtunkhwa': 'National',
    'Film, TV, Life & Style, life and style': 'Entertainment', 'TV, Film, Life & Style': 'Entertainment',
    'Film, Life & Style': 'Entertainment', 'Pakistan, Cities, Lahore': 'National',
    'Life & Style, Gossip, Film': 'Entertainment', 'Film, Art and Books': 'Entertainment', 'Food, ADVICE, Health': 'Health',
    'Gossip, Fashion': 'Entertainment', 'Gossip, Life & Style': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Mardan': 'National', 'Pakistan, Punjab, Lahore': 'National',
    'Life & Style, Gossip, Fashion': 'Entertainment', 'Pakistan, Cities': 'National',
    'Islamabad, Rawalpindi': 'National', 'Islamabad, Pakistan': 'National', 'Pakistan, Islamabad, World': 'World',
    'Pakistan, Sindh, Nawabshah': 'National', 'Balochistan, Islamabad, Pakistan': 'National', 'Fashion, Life & Style': 'Entertainment',
    'Health, ADVICE, Life & Style': 'Health', 'Music, Life & Style': 'Entertainment',
    'Pakistan, Peshawar, Islamabad, Cities': 'National', 'Punjab, Gilgit-Baltistan': 'National', 'Health, World': 'Health',
    'Gossip, Life & Style, Music': 'Entertainment', 'Gossip, Music': 'Entertainment', 'Gossip, Film, TV': 'Entertainment',
    'Technology, Life & Style': 'Technology', 'Sindh, Pakistan, Karachi, Cities': 'National',
    'Gossip, TV, Film, Life & Style': 'Entertainment', 'Pakistan, World, Islamabad, Cities': 'World',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'Sindh, Nawabshah': 'National', 'Azad Jammu & Kashmir, Pakistan': 'National',
    'TV, Gossip, Life & Style': 'Entertainment', 'Film, TV, Life & Style': 'Entertainment',
    'Khyber Pakhtunkhwa, Peshawar, Sports': 'Sports', 'Film, Gossip, Life & Style': 'Entertainment',
    'Rawalpindi, Islamabad': 'National', 'Sindh, sukkur': 'National', 'Pakistan, World, Islamabad': 'World',
    'Pakistan, Sindh, Karachi, Islamabad': 'National', 'Business, Pakistan': 'Business',
    'ADVICE, Life & Style, Film': 'Entertainment', 'Film, Life & Style, TV, Gossip': 'Entertainment', 'Music, TV': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Islamabad': 'National', 'Music, Gossip, Life & Style': 'Entertainment',
    'Islamabad, Cities, Pakistan': 'National', 'Islamabad, Punjab': 'National', 'Health, Life & Style': 'Health',
    'Technology, World': 'Technology', 'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Hyderabad': 'National', 'Life & Style, TV, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'ADVICE, Life & Style': 'Health', 'Pakistan, Sindh, Balochistan': 'National',
    'Gossip, Life & Style, TV': 'Entertainment', 'Music, Film, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Islamabad': 'National', 'Pakistan, World, Azad Jammu & Kashmir': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World', 'Rawalpindi, Pakistan, Business': 'Business',
    'Khyber Pakhtunkhwa, Peshawar': 'National', 'Sindh, dadu': 'National', 'Fashion, Gossip, Life & Style': 'Entertainment',
    'Pakistan, Karachi, Sindh': 'National', 'Pakistan, Balochistan, Quetta': 'National', 'TV, Life & Style': 'Entertainment',
    'Pakistan, Islamabad, World, Cities': 'World', 'Pakistan, Khyber Pakhtunkhwa, Cities': 'National',
    'Pakistan, Karachi, Sindh, Cities': 'National', 'Balochistan, Quetta': 'National', 'Pakistan, Rawalpindi': 'National',
    'Punjab, Film': 'Entertainment', 'Khyber Pakhtunkhwa, Swat': 'National', 'Balochistan, Gwadar': 'National',
    'TV, Life & Style, Gossip': 'Entertainment', 'Sindh, Karachi, Pakistan, Cities': 'National',
    'Pakistan, Sindh, Islamabad': 'National', 'Pakistan, Punjab, Islamabad, Cities': 'National',
    'Khyber Pakhtunkhwa, Nowshera': 'National', 'TV, Film, Gossip': 'Entertainment', 'Abbottabad': 'National',
    'Pakistan, Karachi, Peshawar': 'National', 'Pakistan, Khyber Pakhtunkhwa': 'National', 'Sports, Hockey': 'Sports',
    'Art and Books, Life & Style': 'Entertainment', 'Pakistan, World, Balochistan': 'World', 'Theatre': 'Entertainment',
    'Gossip, Film, Life & Style': 'Entertainment', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business',
    'Film, TV, Gossip, Life & Style': 'Entertainment', 'Life & Style, Sports': 'Sports',
    'Music, Art and Books, Life & Style, Film': 'Entertainment', 'Pakistan, Sindh, Karachi, Hyderabad, Cities': 'National',
    'Life & Style, ADVICE': 'Health',
    'Pakistan, Sindh, Punjab, Azad Jammu & Kashmir, Gilgit-Baltistan, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Karachi': 'National', 'Fashion, Music': 'Entertainment', 'Film, TV': 'Entertainment',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra, Nowshera, Charsadda': 'National', 'Fashion, Gossip, TV': 'Entertainment',
    'Karachi, Pakistan': 'National', 'Pakistan, Azad Jammu & Kashmir, Muzaffarabad': 'National',
    'Pakistan, Azad Jammu & Kashmir, World': 'World', 'Film, Music, Gossip': 'Entertainment',
    'Pakistan, Punjab, Rawalpindi': 'National', 'Pakistan, Lahore, Punjab': 'National', 'World, Newslab': 'World',
    'Karachi': 'National', 'Pakistan, Islamabad, Azad Jammu & Kashmir': 'National', 'Gossip, Fashion, Life & Style': 'Entertainment',
    'Pakistan, Rawalpindi, Karachi': 'National', 'Art and Books, Gossip, Life & Style': 'Entertainment',
    'Film, Theatre, Life & Style': 'Entertainment', 'Pakistan, Punjab, Islamabad': 'National',
    'Gilgit-Baltistan, Pakistan': 'National',
    'World, Business': 'Business',
    'Gilgit-Baltistan': 'National',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',
    'pakistan, punjab, khyber-pakhtunkhwa': 'National',
    'pakistan, cities, khyber-pakhtunkhwa': 'National',
    'khyber-pakhtunkhwa, mardan': 'National',
    'khyber-pakhtunkhwa, peshawar, sports': 'Sports',
    'islamabad, khyber-pakhtunkhwa': 'National',
    'ù¾ø§ú©ø³øªø§ù\x86': 'National',
    'khyber pakhtunkhwa, pakistan, swat': 'National'
}

# Create a new mapping with all keys converted to lowercase
lowercase_category_mapping = {k.lower(): v for k, v in category_mapping.items()}

for name, df in dataframes.items():

    if 'categories' in df.columns:
        df['categories'] = df['categories'].astype(str).str.lower()

    # Apply the lowercase mapping
    df['mapped_categories'] = df['categories'].map(lowercase_category_mapping)

for name, df in dataframes.items():
    unique_mapped_categories = df['mapped_categories'].unique()
    print(f"\nUnique mapped categories in {name}:")
    print(unique_mapped_categories)
    nan_count = df['mapped_categories'].isnull().sum()
    print(f"Number of NaN values in 'mapped_categories' in {name}: {nan_count}")


Unique mapped categories in tribune(full-data):
['National' 'World' 'Sports' 'Business' 'Entertainment' 'Food'
 'Technology' 'Opinion' 'Health' 'Other' 'Politics']
Number of NaN values in 'mapped_categories' in tribune(full-data): 0


In [None]:
filtered_dataframes = {}

output_file_path = "/content/drive/MyDrive/project dataset/tribune.csv"

for name, df in dataframes.items():
    # Filter for 'National' or 'World'
    filtered_df = df[df['mapped_categories'].isin(['National', 'World'])]

    filtered_dataframes[name + '_filtered'] = filtered_df

    # Save directly to Google Drive
    filtered_df.to_csv(output_file_path, index=False)

    print(f"Filtered data saved to Drive at: {output_file_path}")
    print(f"\nFiltered DataFrame: {name}_filtered")
    display(filtered_df.head())
    print(f"Shape of filtered DataFrame: {filtered_df.shape}")


## **Dawn**

In [17]:
selected_files5 = files[4:]
print("Selected files:", selected_files5)

dataframes = {}

for file in selected_files5:
    file_path = os.path.join(data_path, file)
    df_name = os.path.splitext(file)[0]
    try:
        # load CSV
        df = pd.read_csv(file_path, encoding="latin-1", index_col=False)

        # remove unnamed columns
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

        # store cleaned dataframe
        dataframes[df_name] = df
        print(f"Loaded {file} (cleaned unnamed columns)")
    except Exception as e:
        print(f"Error loading {file}: {e}")

# display summary
for name, df in dataframes.items():
    print(f"\nDataFrame: {name}")
    display(df.head())
    print(f"Columns: {df.columns.tolist()}")


Selected files: ['dawn (full-data).csv']
Loaded dawn (full-data).csv (cleaned unnamed columns)

DataFrame: dawn (full-data)


  df = pd.read_csv(file_path, encoding="latin-1", index_col=False)


Unnamed: 0,headline,date,link,source,categories,description
0,Chinese national held for beating traffic poli...,01/01/2020,https://www.dawn.com/news/1525657/chinese-nati...,Dawn,Pakistan,A Chinese national was arrested and booked on ...
1,Iraqi paramilitaries call for withdrawal from ...,01/01/2020,https://www.dawn.com/news/1525656/iraqi-parami...,Dawn,World,Paramilitary groups who have been protesting a...
2,Sarmad Khoosat reveals why Zindagi Tamasha's t...,01/01/2020,https://www.dawn.com/news/1525655/sarmad-khoos...,Dawn,Pakistan,"Sarmad Khoosatâs Zindagi Tamasha, has alread..."
3,PSL 2020 set to begin on February 20,01/01/2020,https://www.dawn.com/news/1525653/psl-2020-set...,Dawn,Sport,The schedule for the much anticipated fifth ed...
4,"Despite extreme differences, Centre willing to...",01/01/2020,https://www.dawn.com/news/1525650/despite-extr...,Dawn,Pakistan,"Federal Minister for Planning, Development & R..."


Columns: ['headline', 'date', 'link', 'source', 'categories', 'description']


In [18]:
all_unique_categories = set()

for df_name, df in dataframes.items():
    if 'categories' in df.columns:
        df['categories'] = df['categories'].astype(str).str.lower()
        for categories_str in df['categories'].dropna().unique():
            # Split by comma and clean up whitespace
            for category in categories_str.split(','):
                cleaned_category = category.strip()
                if cleaned_category and cleaned_category != 'nan':
                    all_unique_categories.add(cleaned_category)

print("Unique Categories:")
for category in sorted(list(all_unique_categories)):
    print(f"- {category}")

Unique Categories:
- and mostly behind the scenes. provincial governments collapse frequently. there is a stalemate
- business
- categories
- is elect a new prime minister. until they do so
- mr ahmed believes âit isnât possible to revamp the entire infrastructure. look at the enormity of the task. even $10bn wouldnât suffice. only the gaps can be bridged.âthe sc-mandated commission on water and sanitation in sindh made 100 recommendations to improve service delivery in these sectors. according to shahab usto
- pakistan
- president alvi dissolved the na under article 58 of the constitution.later in the evening
- prism
- seen as another act of transgressing institutional boundaries
- similarly
- sport
- world
- âhow ignorant art thou in thy pride of wisdom!âthe writer is a lawyer and a former president of the sindh high court bar association. he tweets @salahmedpkpublished in dawn
- âokay
- â jinnah would say


In [19]:
broad_categories = ['Business', 'Politics', 'National', 'Sports', 'Opinion', 'Entertainment',
                    'Technology', 'Health', 'Food', 'World', 'Other']

category_mapping = {
    'Business': 'Business', 'Pakistan, Business': 'Business', 'Business, World': 'Business',
    'Balochistan, Business': 'Business', 'Punjab, Business': 'Business', 'Business, Technology': 'Business',
    'Business, K-P': 'Business', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business', 'Pakistan, Business, Islamabad': 'Business',
    'GOVERNANCE': 'Politics', 'GOVERNANCE & HEADLINES': 'Politics',
    'GOVERNANCE & HEADLINES & Top Headlines': 'Politics', 'GOVERNANCE & NATIONAL': 'Politics',
    'GOVERNANCE & HEADLINES & NATIONAL': 'Politics', 'POLITICS': 'Politics',
    'POLITICS, Pakistan': 'Politics', 'POLITICS, World': 'Politics', 'Pakistan, POLITICS': 'Politics',
    'NATIONAL': 'National', 'ISLAMABAD': 'National', 'KARACHI': 'National', 'LAHORE': 'National',
    'PESHAWAR': 'National', 'MULTAN': 'National', 'CITY': 'National', 'Rawalpindi': 'National',
    'Pakistan': 'National', 'Punjab': 'National', 'Sindh': 'National', 'K-P': 'National',
    'Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Azad Jammu & Kashmir': 'National',
    'Sports': 'Sports', 'Sports & Top Headlines': 'Sports', 'Sports & World': 'Sports',
    'Sports, K-P': 'Sports', 'Sports, Pakistan': 'Sports', 'Sports, TV': 'Sports',
    'Sports, Hockey': 'Sports', 'Sports, Cricket': 'Sports', 'Football': 'Sports', 'Tennis': 'Sports',
    'Sports, Punjab': 'Sports', 'Sports, Life & Style': 'Sports', 'Sports, Multan, Cities': 'Sports',
    'Editorials': 'Opinion', 'Editorials & Letters & Opinion': 'Opinion', 'Comment': 'Opinion',
    'Comment & Opinion': 'Opinion', 'Letters': 'Opinion', 'Opinion': 'Opinion', 'Cartoon & Opinion': 'Opinion',
    'Comment & HEADLINES & Opinion': 'Opinion', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'Entertainment': 'Entertainment', 'Life & Style': 'Entertainment', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Music': 'Entertainment', 'Life & Style, TV': 'Entertainment', 'Life & Style, Gossip': 'Entertainment',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Food': 'Entertainment',
    'Life & Style, Art and Books': 'Entertainment', 'Life & Style, Theatre': 'Entertainment',
    'Life & Style, Spotlight': 'Entertainment', 'Life & Style, Bollywood': 'Entertainment',
    'Life & Style, K-P': 'Entertainment', 'Life & Style, Health': 'Entertainment',
    'Technology': 'Technology', 'World, Technology': 'Technology', 'Technology, Business': 'Technology',
    'Technology, Sports': 'Technology', 'Technology, Food': 'Technology', 'Technology, Games': 'Technology',
    'Technology, Life & Style': 'Technology', 'Technology, Pakistan': 'Technology',
    'Health': 'Health', 'Pakistan, Health': 'Health', 'Sindh, Health': 'Health',
    'Jammu & Kashmir, Health': 'Health', 'Health, ADVICE': 'Health', 'Food, ADVICE, Health': 'Health',
    'Health, Life & Style': 'Health', 'Health, Latest': 'Health', 'Food': 'Food', 'Food, Spotlight': 'Food',
    'Technology, Food': 'Food',
    'World': 'World', 'Pakistan, World': 'World', 'World, Jammu & Kashmir': 'World', 'World, Sports': 'World',
    'World, Gilgit Baltistan': 'World', 'World, Life & Style': 'World', 'World, Music': 'World',
    'World, Fashion': 'World', 'World, Health': 'World', 'World, K-P': 'World', 'World, archives': 'World',
    'World, Videos': 'World', 'World, Opinion': 'Opinion', 'World, Food, Technology': 'World', 'World, Bollywood': 'World',
    'World, Newslab': 'World', 'Pakistan, World, Islamabad': 'World', 'Pakistan, Islamabad, World, Cities': 'World',
    'Pakistan, World, Balochistan': 'World', 'Pakistan, Azad Jammu & Kashmir, World': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World',

    'Uncategorized': 'Other', 'SPONSORED': 'Other', 'Sponsored Content': 'Other',
    'NATIVE CONTENT': 'Other', 'Archives': 'Other', 'Slideshows, World': 'Other', 'Latest': 'Other',
    'Multimedia': 'Other', 'T.Edit': 'Other', 'NATIVE CONTENT & Pakistan Today': 'Other',
    'HEADLINES': 'Other', 'HEADLINES & Top Headlines': 'Other', 'FEATURED & top Featured': 'Other',
    'FEATURED & Top Non Business': 'Other', 'FEATURED': 'Other', 'E-papers & Profit Magazine': 'Other',
    'E-papers & Pakistan Today': 'Other', 'Top Headlines': 'Other', 'NATIONAL & Top Non Business': 'Other',
    'Editorials & Opinion': 'Opinion', 'Letters & Opinion': 'Opinion', 'NATIONAL & Top Headlines': 'Other',
    'NATIONAL & World': 'World', 'Top Non Business & World': 'World', 'LAHORE & NATIONAL': 'National',
    'Analysis & HEADLINES': 'Other', 'HEADLINES & NATIONAL': 'Other', 'CITY & LAHORE': 'National',
    'HEADLINES & NATIONAL & Top Headlines': 'Other', 'Top Headlines & World': 'World', 'KARACHI & NATIONAL': 'National',
    'NATIONAL & PESHAWAR': 'National', 'HEADLINES & KARACHI': 'Other', 'HEADLINES & LAHORE': 'Other',
    'HEADLINES & PESHAWAR': 'Other', 'INTERVIEW & Top Headlines': 'Other', 'ISLAMABAD & NATIONAL': 'National',
    'CITY & HEADLINES & LAHORE': 'Other', 'HEADLINES & Sports': 'Sports', 'KARACHI & LAHORE & NATIONAL': 'National',
    'KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Non Business': 'Other', 'FEATURED & Top Headlines': 'Other',
    'NATIONAL & Sports': 'Sports', 'KARACHI & NATIONAL & Top Headlines': 'National', 'CITY & ISLAMABAD': 'National',
    'HEADLINES & Top Headlines & World': 'World', 'Editorials & HEADLINES & Opinion': 'Opinion',
    'HEADLINES & LAHORE & NATIONAL': 'Other',
    'Analysis & HEADLINES & NATIONAL & Top Headlines': 'Other',
    'Book Review': 'Other', 'LAHORE & NATIONAL & Top Headlines': 'National',
    'HEADLINES & LAHORE & NATIONAL & Top Headlines': 'Other', 'CITY & KARACHI & NATIONAL': 'National',
    'LAHORE & NATIONAL & PESHAWAR': 'National', 'Comment & Editorials': 'Opinion',
    'HEADLINES & Sports & Top Headlines': 'Sports', 'Book Review & E-papers & Pakistan Today': 'Other',
    'CITY & HEADLINES & NATIONAL': 'Other', 'CITY & NATIONAL': 'National', 'FEATURED & NATIONAL': 'Other',
    'ISLAMABAD & KARACHI & LAHORE': 'National', 'HEADLINES & NATIONAL & Top Headlines & World': 'World',
    'LAHORE & Top Headlines': 'Other', 'HEADLINES & ISLAMABAD & NATIONAL': 'Other',
    'HEADLINES & NATIONAL & Sports & Top Headlines': 'Sports', 'NATIONAL & Sports & Top Headlines': 'Sports',
    'Analysis & E-papers & Pakistan Today': 'Other', 'CITY & HEADLINES': 'Other', 'Sports & World': 'Sports',
    'HEADLINES & ISLAMABAD & LAHORE & NATIONAL': 'Other', 'CITY & FEATURED & LAHORE': 'Other',
    'NATIONAL & Opinion': 'Opinion', 'Entertainment & World': 'Entertainment',
    'HEADLINES & ISLAMABAD & NATIONAL & Pakistan Today': 'Other',
    'NATIONAL & Sports & Top Headlines & Top Non Business': 'Sports', 'HEADLINES & KARACHI & NATIONAL': 'Other',
    'OIC & World': 'World', 'Agriculture & NATIONAL': 'National', 'NATIONAL & top Featured': 'Other',
    'CITY & Education & MULTAN': 'National', 'Cartoon & Letters': 'Opinion', 'FEATURED & HEADLINES & NATIONAL': 'Other',
    'FEATURED & Sports': 'Sports', 'Comment & Letters': 'Opinion', 'ISLAMABAD & SPONSORED': 'Other',
    'top Featured & World': 'World', 'HEADLINES & NATIONAL & top Featured': 'Other',
    'NATIONAL & Top Headlines & World': 'World', 'NATIONAL & top Featured & World': 'World',
    'MULTAN & NATIONAL': 'National', 'NATIONAL & top Featured & Top Headlines': 'Other',
    'NATIONAL & PESHAWAR & Top Headlines': 'National', 'CITY & PESHAWAR': 'National',
    'HEADLINES & Letters & Opinion': 'Opinion', 'CITY & LAHORE & Opinion': 'Opinion',
    'ISLAMABAD & Pakistan Today': 'Other', 'top Featured & Top Headlines': 'Other', 'top Featured': 'Other',
    'PESHAWAR & Sports': 'Sports', 'Sports & top Featured': 'Sports', 'E-papers': 'Other',
    'CITY & ISLAMABAD & NATIONAL': 'National', 'Comment & NATIONAL & Opinion': 'Opinion',
    'Book Review & NATIONAL': 'Other', 'Entertainment & NATIONAL': 'Entertainment', 'Analysis & NATIONAL': 'Other',
    'NATIONAL & Pakistan Today': 'Other', 'Cartoon & Editorials': 'Opinion', 'CITY & LAHORE & NATIONAL': 'National',
    'Education & NATIONAL': 'National', 'NATIVE CONTENT & Pakistan Today': 'Other',

    'Pakistan, Punjab': 'National', 'Pakistan, K-P': 'National', 'Life & Style, Film, Gossip': 'Entertainment',
    'Life & Style, TV': 'Entertainment', 'Sindh': 'National', 'Life & Style, Film': 'Entertainment',
    'Life & Style, Gossip': 'Entertainment', 'Life & Style, Music': 'Entertainment', 'Punjab': 'National', 'K-P': 'National',
    'Editorial': 'Opinion', 'Balochistan': 'National', 'Sindh, Health': 'Health', 'Pakistan, Life & Style': 'Entertainment',
    'Jammu & Kashmir, Health': 'Health', 'Pakistan, Sindh': 'National', 'K-P, Music': 'Entertainment',
    'Life & Style': 'Entertainment', 'Pakistan, Balochistan': 'National', 'Gilgit Baltistan': 'National', 'Jammu & Kashmir': 'National',
    'Life & Style, Art and Books, Music': 'Entertainment', 'Life & Style, Fashion, Gossip': 'Entertainment',
    'Life & Style, Music, Gossip': 'Entertainment', 'Pakistan, Jammu & Kashmir': 'National', 'Sindh, Punjab': 'National',
    'Life & Style, Fashion': 'Entertainment', 'Life & Style, Film, TV': 'Entertainment', 'Balochistan, Business': 'Business',
    'Life & Style, Health': 'Health', 'Punjab, Business': 'Business', 'Music, Film': 'Entertainment', 'TV': 'Entertainment',
    'Life & Style, Music, Food': 'Entertainment', 'Pakistan, Health': 'Health', 'Balochistan, K-P': 'National',
    'Sindh, Technology': 'Technology', 'Film': 'Entertainment', 'Sindh, Life & Style, Music': 'Entertainment',
    'Life & Style, Gossip, TV': 'Entertainment', 'Life & Style, Art and Books': 'Entertainment', 'K-P, Technology': 'Technology',
    'Magazine': 'Other', 'Film, Gossip': 'Entertainment', 'Life & Style, Theatre': 'Entertainment', 'Business, Technology': 'Business',
    'Balochistan, Gilgit Baltistan': 'National', 'K-P, Health': 'Health', 'Pakistan, Gilgit Baltistan': 'National',
    'Life & Style, Film, Fashion': 'Entertainment', 'Fashion': 'Entertainment', 'Punjab, World': 'World',
    'Pakistan, Sports': 'Sports', 'Pakistan, Technology': 'Technology', 'Balochistan, Health': 'Health',
    'Pakistan, Sindh, Art and Books': 'Entertainment', 'Life & Style, Fashion, TV': 'Entertainment', 'TV, Theatre': 'Entertainment',
    'Life & Style, Food': 'Food', 'Pakistan, Film': 'Entertainment', 'Health': 'Health', 'World, K-P': 'World',
    'Life & Style, K-P': 'Entertainment', 'Art and Books': 'Entertainment', 'Opinion, Health': 'Opinion',
    'Life & Style, Music, TV': 'Entertainment', 'Sports, Life & Style': 'Sports', 'Sindh, Jammu & Kashmir': 'National',
    'Sindh, Business': 'Business', 'Life & Style, Film, Theatre': 'Entertainment', 'Sindh, Sports': 'Sports',
    'archives': 'Other', 'Music, Health': 'Health', 'Punjab, Technology': 'Technology', 'Punjab, Health': 'Health',
    'Life & Style, Food, Gossip': 'Entertainment', 'Life & Style, Food, Health': 'Health', 'Sports, K-P': 'Sports',
    'Sindh, Life & Style': 'Entertainment', 'Life & Style, Music, Theatre': 'Entertainment', 'Sports, Videos': 'Sports',
    'Opinion, Technology': 'Opinion', 'Sindh, World': 'World', 'Pakistan, K-P, Health': 'Health',
    'Pakistan, Sindh, Punjab': 'National', 'Life & Style, Opinion': 'Opinion', 'Balochistan, Life & Style': 'Entertainment',
    'Life & Style, Art and Books, Film': 'Entertainment', 'Pakistan, Opinion': 'Opinion', 'Punjab, Jammu & Kashmir': 'National',
    'Punjab, Sports': 'Sports', 'Life & Style, Technology': 'Technology', 'Gossip': 'Entertainment',
    'Life & Style, Music, Film': 'Entertainment', 'Life & Style, Health, TV': 'Health', 'Technology, Games': 'Technology',
    'Pakistan, World, Jammu & Kashmir': 'World', 'Life & Style, Music, Fashion': 'Entertainment',
    'Life & Style, Art and Books, Health': 'Health', 'Sindh, Videos': 'Other', 'Punjab, Food': 'Food',
    'Life & Style, Film, Health': 'Health', 'Sports, Multan, Cities': 'Sports', 'Music, Fashion': 'Entertainment',
    'Videos': 'Other', 'K-P, Art and Books': 'Entertainment', 'Music, Gossip': 'Entertainment',
    'Business, Jammu & Kashmir': 'Business', 'Sindh, Balochistan': 'National', 'Opinion, Editorial': 'Opinion',
    'Pakistan, archives': 'Other', 'Jammu & Kashmir, Gilgit Baltistan': 'National', 'Punjab, K-P': 'National',
    'Business, K-P': 'Business', 'Life & Style, Fashion, Health': 'Health', 'World, Azad Jammu & Kashmir': 'World',
    'Life &amp; Style, TV': 'Entertainment', 'Sindh, Business, Health': 'Business', 'Sports, Business': 'Sports',
    'Punjab, Business, Lahore, Cities': 'Business', 'Punjab, Life & Style, Gossip': 'Entertainment',
    'Life & Style, Art and Books, TV': 'Entertainment', 'Sindh, Karachi, Cities': 'National', 'Sindh, archives': 'Other',
    'Pakistan, Islamabad': 'National', 'life and style': 'Entertainment', 'life and style, Music': 'Entertainment',
    'Islamabad': 'National', 'Pakistan, Health, Food': 'Health', 'Pakistan, Khyber-Pakhtunkhwa': 'National',
    'Music, Film, Theatre': 'Entertainment', 'Music': 'Entertainment', 'Cricket': 'Sports',
    'Film, Gossip, Bollywood': 'Entertainment', 'Khyber-Pakhtunkhwa': 'National', 'Sindh, Karachi': 'National',
    'Khyber-Pakhtunkhwa, Pakistan': 'National', 'World, Azad Jammu & Kashmir, Cities': 'World',
    'Pakistan, Islamabad, Cities': 'National', 'Gossip, TV, Music': 'Entertainment',
    'Music, Pakistan, Life & Style': 'Entertainment', 'Bollywood, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir': 'National', 'Art and Books, Film, Games': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Cities': 'National', 'Sindh, Hyderabad': 'National', 'Sindh, Cities': 'National',
    'Pakistan, Sindh, Cities': 'National', 'Art and Books, Film': 'Entertainment', 'Pakistan, Lahore': 'National',
    'Sports, TV, Gossip': 'Sports', 'Punjab, Pakistan, Lahore': 'National', 'Games': 'Other',
    'Khyber-Pakhtunkhwa, Swat': 'National', 'Pakistan, Sindh, Karachi, Business': 'Business',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National', 'Football': 'Sports', 'Pakistan, Peshawar': 'National',
    'TV, Sports': 'Sports', 'Khyber-Pakhtunkhwa, Cities, Peshawar': 'National', 'Rawalpindi': 'National',
    'Slideshows, World': 'World', 'Pakistan, Azad Jammu & Kashmir, Cities': 'National',
    'Pakistan, Cities, Khyber Pakhtunkhwa': 'National', 'Gossip, Film': 'Entertainment', 'Health, ADVICE': 'Health',
    'TV, Film': 'Entertainment', 'Pakistan, Lahore, Cities': 'National', 'Pakistan, Gwadar, Cities': 'National',
    'Rawalpindi, Punjab': 'National', 'Punjab, Rawalpindi': 'National', 'Khyber-Pakhtunkhwa, Peshawar': 'National',
    'Punjab, Lahore': 'National', 'Pakistan, Balochistan, Cities': 'National', 'Sindh, khairpur': 'National',
    'Islamabad, Pakistan, Cities': 'National', 'Pakistan, Khyber-Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Karachi': 'National', 'Khyber-Pakhtunkhwa, Abbottabad': 'National', 'TV, Gossip': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Gilgit-Baltistan': 'National', 'World, Pakistan': 'World',
    'Khyber-Pakhtunkhwa, Cities': 'National', 'Fashion, Gossip': 'Entertainment', 'Islamabad, World': 'World',
    'Punjab, Multan': 'National', 'Punjab, Faisalabad': 'National', 'Pakistan, Islamabad, Sindh': 'National',
    'Pakistan, Karachi, Cities': 'National', 'Islamabad, Business': 'Business', 'Sindh, Pakistan': 'National',
    'Pakistan, Islamabad, Punjab': 'National', 'Pakistan, Sindh, Karachi, Cities': 'National',
    'Pakistan, Cities, Azad Jammu & Kashmir': 'National', 'Health, Life & Style, ADVICE': 'Health',
    'Islamabad, Balochistan': 'National', 'Pakistan, Gilgit-Baltistan': 'National', 'Pakistan, Punjab, Cities': 'National',
    'Sindh, tharparkar': 'National', 'Business, Gilgit-Baltistan': 'Business', 'Khyber Pakhtunkhwa': 'National',
    'Film, TV, Life & Style, life and style': 'Entertainment', 'TV, Film, Life & Style': 'Entertainment',
    'Film, Life & Style': 'Entertainment', 'Pakistan, Cities, Lahore': 'National',
    'Life & Style, Gossip, Film': 'Entertainment', 'Film, Art and Books': 'Entertainment', 'Food, ADVICE, Health': 'Health',
    'Gossip, Fashion': 'Entertainment', 'Gossip, Life & Style': 'Entertainment',
    'Pakistan, Khyber-Pakhtunkhwa, Mardan': 'National', 'Pakistan, Punjab, Lahore': 'National',
    'Life & Style, Gossip, Fashion': 'Entertainment', 'Pakistan, Cities': 'National',
    'Islamabad, Rawalpindi': 'National', 'Islamabad, Pakistan': 'National', 'Pakistan, Islamabad, World': 'World',
    'Pakistan, Sindh, Nawabshah': 'National', 'Balochistan, Islamabad, Pakistan': 'National', 'Fashion, Life & Style': 'Entertainment',
    'Health, ADVICE, Life & Style': 'Health', 'Music, Life & Style': 'Entertainment',
    'Pakistan, Peshawar, Islamabad, Cities': 'National', 'Punjab, Gilgit-Baltistan': 'National', 'Health, World': 'Health',
    'Gossip, Life & Style, Music': 'Entertainment', 'Gossip, Music': 'Entertainment', 'Gossip, Film, TV': 'Entertainment',
    'Technology, Life & Style': 'Technology', 'Sindh, Pakistan, Karachi, Cities': 'National',
    'Gossip, TV, Film, Life & Style': 'Entertainment', 'Pakistan, World, Islamabad, Cities': 'World',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'Sindh, Nawabshah': 'National', 'Azad Jammu & Kashmir, Pakistan': 'National',
    'TV, Gossip, Life & Style': 'Entertainment', 'Film, TV, Life & Style': 'Entertainment',
    'Khyber Pakhtunkhwa, Peshawar, Sports': 'Sports', 'Film, Gossip, Life & Style': 'Entertainment',
    'Rawalpindi, Islamabad': 'National', 'Sindh, sukkur': 'National', 'Pakistan, World, Islamabad': 'World',
    'Pakistan, Sindh, Karachi, Islamabad': 'National', 'Business, Pakistan': 'Business',
    'ADVICE, Life & Style, Film': 'Entertainment', 'Film, Life & Style, TV, Gossip': 'Entertainment', 'Music, TV': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Islamabad': 'National', 'Music, Gossip, Life & Style': 'Entertainment',
    'Islamabad, Cities, Pakistan': 'National', 'Islamabad, Punjab': 'National', 'Health, Life & Style': 'Health',
    'Technology, World': 'Technology', 'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Hyderabad': 'National', 'Life & Style, TV, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Mardan': 'National', 'ADVICE, Life & Style': 'Health', 'Pakistan, Sindh, Balochistan': 'National',
    'Gossip, Life & Style, TV': 'Entertainment', 'Music, Film, Gossip': 'Entertainment',
    'Khyber Pakhtunkhwa, Islamabad': 'National', 'Pakistan, World, Azad Jammu & Kashmir': 'World',
    'World, Khyber Pakhtunkhwa, Peshawar, Islamabad, Pakistan': 'World', 'Rawalpindi, Pakistan, Business': 'Business',
    'Khyber Pakhtunkhwa, Peshawar': 'National', 'Sindh, dadu': 'National', 'Fashion, Gossip, Life & Style': 'Entertainment',
    'Pakistan, Karachi, Sindh': 'National', 'Pakistan, Balochistan, Quetta': 'National', 'TV, Life & Style': 'Entertainment',
    'Pakistan, Islamabad, World, Cities': 'World', 'Pakistan, Khyber Pakhtunkhwa, Cities': 'National',
    'Pakistan, Karachi, Sindh, Cities': 'National', 'Balochistan, Quetta': 'National', 'Pakistan, Rawalpindi': 'National',
    'Punjab, Film': 'Entertainment', 'Khyber Pakhtunkhwa, Swat': 'National', 'Balochistan, Gwadar': 'National',
    'TV, Life & Style, Gossip': 'Entertainment', 'Sindh, Karachi, Pakistan, Cities': 'National',
    'Pakistan, Sindh, Islamabad': 'National', 'Pakistan, Punjab, Islamabad, Cities': 'National',
    'Khyber Pakhtunkhwa, Nowshera': 'National', 'TV, Film, Gossip': 'Entertainment', 'Abbottabad': 'National',
    'Pakistan, Karachi, Peshawar': 'National', 'Pakistan, Khyber Pakhtunkhwa': 'National', 'Sports, Hockey': 'Sports',
    'Art and Books, Life & Style': 'Entertainment', 'Pakistan, World, Balochistan': 'World', 'Theatre': 'Entertainment',
    'Gossip, Film, Life & Style': 'Entertainment', 'Pakistan, Business, Khyber Pakhtunkhwa': 'Business',
    'Film, TV, Gossip, Life & Style': 'Entertainment', 'Life & Style, Sports': 'Sports',
    'Music, Art and Books, Life & Style, Film': 'Entertainment', 'Pakistan, Sindh, Karachi, Hyderabad, Cities': 'National',
    'Life & Style, ADVICE': 'Health',
    'Pakistan, Sindh, Punjab, Azad Jammu & Kashmir, Gilgit-Baltistan, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Karachi': 'National', 'Fashion, Music': 'Entertainment', 'Film, TV': 'Entertainment',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra, Nowshera, Charsadda': 'National', 'Fashion, Gossip, TV': 'Entertainment',
    'Karachi, Pakistan': 'National', 'Pakistan, Azad Jammu & Kashmir, Muzaffarabad': 'National',
    'Pakistan, Azad Jammu & Kashmir, World': 'World', 'Film, Music, Gossip': 'Entertainment',
    'Pakistan, Punjab, Rawalpindi': 'National', 'Pakistan, Lahore, Punjab': 'National', 'World, Newslab': 'World',
    'Karachi': 'National', 'Pakistan, Islamabad, Azad Jammu & Kashmir': 'National', 'Gossip, Fashion, Life & Style': 'Entertainment',
    'Pakistan, Rawalpindi, Karachi': 'National', 'Art and Books, Gossip, Life & Style': 'Entertainment',
    'Film, Theatre, Life & Style': 'Entertainment', 'Pakistan, Punjab, Islamabad': 'National',
    'Gilgit-Baltistan, Pakistan': 'National',
    'World, Business': 'Business',
    'Gilgit-Baltistan': 'National',
    'Ù¾Ø§Ú©Ø³Ø³ØªØ§Ù†': 'Other',
     'CITY & KARACHI': 'National',
    'HEADLINES & World': 'World',
    'HEADLINES & NATIONAL & World': 'World',
    'Food, Health': 'Health',
    'Gossip, TV': 'Entertainment',
    'Peshawar, Khyber Pakhtunkhwa, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Cities': 'National',
    'Life & Style, TV, Film': 'Entertainment',
    'Punjab, Pakistan': 'National',
    'Business, Sindh, Karachi': 'Business',
    'Lahore, Pakistan': 'National',
    'Pakistan, World, Business': 'Business',
    'Life & Style, Bollywood, Art and Books': 'Entertainment',
    'TV, Fashion': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Peshawar': 'National',
    'Pakistan, Punjab, Faisalabad': 'National',
    'Pakistan, Punjab, Gujranwala': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar': 'National',
    'Gossip, Bollywood': 'Entertainment',
    'Khyber Pakhtunkhwa, Pakistan, Charsadda': 'National',
    'Pakistan, World, Technology': 'Technology',
    'Pakistan, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Gilgit-Baltistan, Pakistan, Skardu': 'National',
    'Punjab, Pakistan, Gujranwala': 'National',
    'Sindh, Pakistan, Karachi': 'National',
    'Balochistan, Pakistan': 'National',
    'Pakistan, Sindh, sukkur': 'National',
    'Fashion, Film': 'Entertainment',
    'Pakistan, Azad Jammu & Kashmir, Rawalpindi': 'National',
    'Pakistan, Punjab, Multan': 'National',
    'Khyber Pakhtunkhwa, Pakistan': 'National',
    'Pakistan, Islamabad, Rawalpindi': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Mansehra': 'National',
    'World, Pakistan, Azad Jammu & Kashmir': 'World',
    'Pakistan, Gilgit-Baltistan, gilgit': 'National',
    'Sindh, Karachi, Pakistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Swat': 'National',
    'Pakistan, Islamabad, Karachi': 'National',
    'Pakistan, Sindh, Balochistan, Karachi': 'National',
    'Pakistan, Islamabad, Lahore': 'National',
    'Food, Life & Style': 'Food',
    'Film, Music': 'Entertainment',
    'Pakistan, World, Multan, Punjab': 'World',
    'Pakistan, Sindh, Balochistan, Islamabad, Khyber Pakhtunkhwa, Punjab': 'National',
    'Pakistan, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Art and Books, Fashion': 'Entertainment',
    'Pakistan, Gilgit-Baltistan, Abbottabad': 'National',
    'World, Pakistan, Islamabad': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Bannu, Dera Ismail Khan': 'National',
    'Pakistan, Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Sindh, Karachi, Quetta, Balochistan': 'National',
    'Life & Style, Business': 'Entertainment',
    'Pakistan, Punjab, Lahore, Islamabad': 'National',
    'Punjab, Pakistan, Rawalpindi': 'National',
    'Hockey': 'Sports',
    'Pakistan, Sindh, khairpur': 'National',
    'Pakistan, Islamabad, Sindh, Punjab, Balochistan, Khyber Pakhtunkhwa': 'National',
    'Punjab, Lahore, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Swat': 'National',
    'Pakistan, Punjab, Sindh, Balochistan, Islamabad, Gilgit-Baltistan, Azad Jammu & Kashmir': 'National',
    'World, Pakistan, Balochistan': 'World',
    'Pakistan, Quetta': 'National',
    'Multan, Pakistan, Punjab': 'National',
    'Pakistan, Cricket': 'Sports',
    'Gossip, Art and Books': 'Entertainment',
    'Karachi, Pakistan, Islamabad': 'National',
    'Sindh, Pakistan, Cities': 'National',
    'Karachi, Sindh': 'National',
    'Pakistan, Rawalpindi, Punjab': 'National',
    'Islamabad, Pakistan, Punjab': 'National',
    'Lahore': 'National',
    'Islamabad, Peshawar': 'National',
    'Pakistan, Islamabad, Punjab, Khyber Pakhtunkhwa, Balochistan': 'National',
    'Pakistan, Islamabad, Lahore, Punjab': 'National',
    'Sindh, Pakistan, sukkur': 'National',
    'Khyber Pakhtunkhwa, Peshawar, Pakistan': 'National',
    'Pakistan, Punjab, Sindh': 'National',
    'Islamabad, Karachi, Lahore, Pakistan': 'National',
    'Karachi, Pakistan, Sindh': 'National',
    'Pakistan, Peshawar, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Islamabad, Khyber Pakhtunkhwa, Balochistan, Azad Jammu & Kashmir, Gilgit-Baltistan, Sindh': 'National',
    'Islamabad, Rawalpindi, Pakistan': 'National',
    'Khyber Pakhtunkhwa, Gilgit-Baltistan': 'National',
    'Pakistan, Khyber Pakhtunkhwa, Nowshera': 'National',
    'Quetta, Pakistan, Balochistan': 'National',
    'Pakistan, Sindh, Karachi, Punjab, Lahore': 'National',
    'Life & Style, World': 'World',
    'Pakistan, Khyber Pakhtunkhwa, Peshawar, Islamabad': 'National',
    'Pakistan, Islamabad, Business': 'Business',
    'Islamabad, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Quetta, Balochistan': 'National',
    'Pakistan, Islamabad, Mirpur': 'National',
    'Islamabad, Pakistan, World': 'World',
    'Pakistan, Punjab, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Balochistan, Zhob': 'National',
    'Lahore, Punjab': 'National',
    'Pakistan, Muzaffarabad': 'National',
    'Pakistan, Sindh, Opinion': 'Opinion',
    'Pakistan, Khyber Pakhtunkhwa, Sindh': 'National',
    'Pakistan, Karachi, World': 'World',
    'Technology, Business, World': 'Business',
    'Pakistan, Islamabad, Gilgit-Baltistan': 'National',
    'Peshawar, Khyber Pakhtunkhwa': 'National',
    'Khyber Pakhtunkhwa, Pakistan, Islamabad': 'National',
    'Pakistan, Sindh, Life & Style': 'Entertainment',
    'Pakistan, Sindh, Hyderabad': 'National',
    'gilgit': 'National',
    'Sports, Tennis': 'Sports',
    'Sports, Football': 'Sports',
    'Pakistan, Gilgit-Baltistan, Islamabad': 'National',
    'TV, Music, Fashion': 'Entertainment',
    'Pakistan, Karachi, Sindh, Balochistan': 'National',
    'Fashion, TV, Music': 'Entertainment',
    'World, Pakistan, Sports': 'World',
    'Sports, World': 'Sports',
    'Pakistan, Sports, Cricket': 'Sports',
    'Pakistan, gilgit': 'National',
    'Film, Fashion': 'Entertainment',
    'TV, Film, Music': 'Entertainment',
    'TV, Music': 'Entertainment',
    'Film, TV, Art and Books': 'Entertainment',
    'Karachi, Life & Style': 'Entertainment',
    'Music, Film, TV': 'Entertainment',
    'Azad Jammu & Kashmir, Khyber Pakhtunkhwa': 'National',
    'Pakistan, Punjab, Opinion': 'Opinion',
    'Pakistan, Gwadar, Balochistan': 'National',
    'Film, Music, TV, Art and Books': 'Entertainment',
    'Gossip, TV, Film': 'Entertainment',
    'Azad Jammu & Kashmir, World': 'World',
    'Pakistan, Technology, Sindh, Karachi': 'Technology',
    'Pakistan, Sindh, tharparkar': 'National',
    'Pakistan, Sindh, Sanghar': 'National',
    'Fashion, TV': 'Entertainment',
    'Trends': 'Other',
    'Spotlight': 'Other',
    'Film, Art and Books, TV, Music': 'Entertainment',
    'POLITICS, Life & Style': 'Politics',
    'Spotlight, Music': 'Other',
    'Music, Spotlight': 'Other',
    'Gossip, Spotlight': 'Other',
    'Spotlight, TV': 'Other',
    'Spotlight, Gossip': 'Other',
    'Pakistan, Khyber Pakhtunkhwa, Mardan': 'National',
    'Spotlight, Film': 'Other',
    'Health, Spotlight': 'Health',
    'Spotlight, Fashion': 'Other',
    'Art and Books, Spotlight': 'Entertainment',
    'Pakistan, Balochistan, Derabugti': 'National',
    'Sports, Pakistan, Cricket': 'Sports',
    'Pakistan, Balochistan, Gwadar': 'National',
    'Pakistan, Life & Style, MOVIES': 'Entertainment',
    'Film, Spotlight': 'Entertainment',
    'Business, Life & Style': 'Business',
    'Sports, Pakistan, Punjab, Cricket': 'Sports',
    'Pakistan, Khyber Pakhtunkhwa, Dera Ismail Khan': 'National',
    'Bollywood': 'Entertainment',
    'Pakistan, Cricket, Sports': 'Sports',
    'Pakistan, Balochistan, Football, Gwadar': 'Sports',
    'Pakistan, Sindh, Thatta': 'National',
    'Karachi, Sindh, Pakistan': 'National',
    'Pakistan, Faisalabad, Punjab': 'National',
    'TV, Spotlight': 'Entertainment',
    'Technology, Health': 'Technology',
    'Health, Technology': 'Health',
    'Pakistan, Swat, Khyber Pakhtunkhwa': 'National',
    'Sports, Technology': 'Sports',
    'Pakistan, Sindh, Badin': 'National',
    'Pakistan, Hyderabad, Sindh': 'National',
    'Spotlight, Food': 'Food',
    'Bollywood, Gossip': 'Entertainment',
    'Pakistan, Larkana': 'National',
    'Pakistan, Jacobabad': 'National',
    'Art and Books, Theatre': 'Entertainment',
    'World, Azad Jammu & Kashmir, Pakistan': 'World',
    'Sports, Spotlight': 'Sports',
    'Spotlight, Sports': 'Sports',
    'Punjab, Islamabad': 'National',
    'Football, Sports': 'Sports',
    'Pakistan, Business, World': 'Business',
    'Pakistan, Business, Life & Style': 'Business',
    'Fashion, Spotlight': 'Entertainment',
    'Ù¾Ø§Ú©Ø³ØªØ§Ù†': 'Other',
    'pakistan, punjab, khyber-pakhtunkhwa': 'National',
    'pakistan, cities, khyber-pakhtunkhwa': 'National',
    'khyber-pakhtunkhwa, mardan': 'National',
    'khyber-pakhtunkhwa, peshawar, sports': 'Sports',
    'islamabad, khyber-pakhtunkhwa': 'National',
    'ù¾ø§ú©ø³øªø§ù\x86': 'National',
    'khyber pakhtunkhwa, pakistan, swat': 'National',

    'pakistan': 'National',
    'world': 'World',
    'sport': 'Sports',
    'business': 'Business',
    'prism': 'Other',
    'categories': 'Other',
    'nan': 'Other',
    ' president alvi dissolved the na under article 58 of the constitution.later in the evening': 'Other',
    ' and mostly behind the scenes. provincial governments collapse frequently. there is a stalemate': 'Other',
    'â jinnah would say': 'Other',
    'âhow ignorant art thou in thy pride of wisdom!&#8221;the writer is a lawyer and a former president of the sindh high court bar association. he tweets @salahmedpkpublished in dawn': 'Other',
    'âokay': 'Other',
    'mr ahmed believes âit isn&#8217;t possible to revamp the entire infrastructure. look at the enormity of the task. even $10bn wouldn&#8217;t suffice. only the gaps can be bridged.&#8221;the sc-mandated commission on water and sanitation in sindh made 100 recommendations to improve service delivery in these sectors. according to shahab usto': 'Other',
    'is elect a new prime minister. until they do so': 'Other',
    'similarly': 'Other',
    'seen as another act of transgressing institutional boundaries': 'Other'
}

lowercase_category_mapping = {k.lower(): v for k, v in category_mapping.items()}

processed_dawn_chunks = []

for name, df in dataframes.items():
    if name == 'dawn (full-data)':
        print(f"Processing '{name}' in chunks for category mapping...")
        original_file_name = "dawn (full-data).csv"

        file_path_full = os.path.join(data_path, original_file_name)
        chunk_size = 10000

        for chunk in pd.read_csv(file_path_full, encoding="latin-1", index_col=False, chunksize=chunk_size):
            chunk = chunk.loc[:, ~chunk.columns.str.contains('^Unnamed')]

            if 'categories' in chunk.columns:
                chunk['categories'] = chunk['categories'].astype(str).str.lower().str.strip()
                chunk['mapped_categories'] = chunk['categories'].map(lowercase_category_mapping)
                chunk['mapped_categories'] = chunk['mapped_categories'].fillna('Other')
            else:
                chunk['mapped_categories'] = 'Other'
            processed_dawn_chunks.append(chunk)

        dataframes[name] = pd.concat(processed_dawn_chunks)
        print(f"Finished chunked processing for '{name}'. Reconstructed DataFrame size: {dataframes[name].shape}")

    else:

        if 'categories' in df.columns:
            df['categories'] = df['categories'].astype(str).str.lower().str.strip()
            df['mapped_categories'] = df['categories'].map(lowercase_category_mapping)

            df['mapped_categories'] = df['mapped_categories'].fillna('Other')
        else:
            df['mapped_categories'] = 'Other'

for name, df in dataframes.items():
    unique_mapped_categories = df['mapped_categories'].unique()
    print(f"\nUnique mapped categories in {name}:")
    print(unique_mapped_categories)
    nan_count = df['mapped_categories'].isnull().sum()
    print(f"Number of NaN values in 'mapped_categories' in {name}: {nan_count}")


Processing 'dawn (full-data)' in chunks for category mapping...


  for chunk in pd.read_csv(file_path_full, encoding="latin-1", index_col=False, chunksize=chunk_size):
  for chunk in pd.read_csv(file_path_full, encoding="latin-1", index_col=False, chunksize=chunk_size):
  for chunk in pd.read_csv(file_path_full, encoding="latin-1", index_col=False, chunksize=chunk_size):


Finished chunked processing for 'dawn (full-data)'. Reconstructed DataFrame size: (45077, 7)

Unique mapped categories in dawn (full-data):
['National' 'World' 'Sports' 'Business' 'Other']
Number of NaN values in 'mapped_categories' in dawn (full-data): 0


## **Combining datasets**

In [None]:
# available years
years_found = set()
for file in files:
    file_path = os.path.join(data_path, file)
    print(f"Scanning {file}...")
    for chunk in pd.read_csv(file_path, chunksize=50000, encoding="latin-1", usecols=['date']):
        chunk['date'] = pd.to_datetime(chunk['date'], errors='coerce')
        years_found.update(chunk['date'].dt.year.dropna().unique())

years_found = sorted([int(y) for y in years_found if pd.notna(y)])
print("\nYears found in dataset:", years_found)

In [None]:
combined_df = pd.DataFrame()

print('Combining datasets...')
for file in files:
    file_path = os.path.join(data_path, file)
    try:
        df = pd.read_csv(file_path, encoding='latin-1')
        combined_df = pd.concat([combined_df, df], ignore_index=True)
        print(f'Successfully combined {file}')
    except Exception as e:
        print(f'Error combining {file}: {e}')

output_file_path = os.path.join(data_path, 'combined_dataset.csv')
combined_df.to_csv(output_file_path, index=False)
print(f'Combined dataset saved to: {output_file_path}')


In [None]:
display(combined_df.head())
print(combined_df.info())


In [None]:
combined_df['mapped_categories'].unique()

In [None]:
combined_df['source'].unique()

In [None]:
combined_df.shape