In [364]:
import pandas as pd
from pytrends.request import TrendReq
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages


In [None]:
pytrend = TrendReq()

In [None]:
searchwords = ['web developer','website development service']
pytrend.build_payload(kw_list=searchwords)
# Related Queries
related_queries = pytrend.related_queries()

In [None]:
#Extracting terms
terms_list = []

for queries_type, df in related_queries.items():
    if df is not None:
        terms_list.extend(df['top']['query'].tolist())
        terms_list.extend(df['rising']['query'].tolist())


In [None]:
#Cleaning terms
words_to_filter = ['job', 'salary', 'india','internship','intern','student','chat','what','series','course','roadmap']
min_length = 6
max_length = 30

# Clean the terms list
cleaned_terms = [term for term in terms_list if
                 not any(word in term.lower() for word in words_to_filter) and
                 len(term) > min_length and
                 len(term) <= max_length]


In [None]:
#Appending to searchwords
searchword = cleaned_terms + searchwords
searchword = list(set(searchword))

# Split the list into smaller lists with a maximum of 5 elements each
split_searchword = [searchword[i:i+5] for i in range(0, len(searchword), 5)]

final_df = pd.DataFrame()

for word_list in split_searchword:
    pytrend.build_payload(kw_list=word_list)
    df = pytrend.interest_by_region()
    final_df = pd.concat([final_df, df], ignore_index=False)  # Append along rows

# Aggregate the data by taking the maximum value for each column for each country
final_df = final_df.groupby('geoName').max()

# Print or use the final DataFrame
print(final_df)


In [None]:
#Cleaning final_df
final_df.fillna(0, inplace=True)

In [None]:
# Extract top  countries for each column
top_countries = {}
common_countries = set()
num = 50
for column in final_df.columns:
    sorted_df = final_df.sort_values(by=column, ascending=False).reset_index()
    # Check if the top value for the column is not zero
    if sorted_df.iloc[0][column] != 0:
        common_countries.update(sorted_df.head(num)['geoName'])
        top_countries[column] = sorted_df.head(num)



In [None]:
# Export each DataFrame as CSV

# for column, df_top in top_countries.items():
#     filename = f"{column}_top_countries.csv"
#     df_top.to_csv(filename)
#     print(f"Exported {filename}")

In [365]:
# Create a PDF file to save the plots
with PdfPages('top_countries_plots.pdf') as pdf:
    # Plot top countries for each column separately
    for column, top_df in top_countries.items():
        plt.figure(figsize=(10, 6))
        plt.bar(top_df['geoName'], top_df[column])
        plt.title(f"Top Countries for {column}")
        plt.xlabel('Country')
        plt.ylabel('Interest')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()  # Adjust layout to prevent overlap
        #plt.show()

        pdf.savefig()
        plt.close()  # Close the current plot to release memory

# Print a message indicating the PDF file has been saved
print("Plots have been saved to top_countries_plots.pdf")


Plots have been saved to top_countries_plots.pdf


In [None]:
# Calculate the sum of values in each row across columns
row_sum = final_df.sum(axis=1)
num_columns = len(final_df.columns) - 1  # Subtracting 1 for the country name column
final_df['Average'] = row_sum / num_columns
sorted_final = final_df.sort_values(by = 'Average', ascending = False)

In [None]:
sorted_final.to_csv("Final.csv")

In [367]:
number = 50
with PdfPages('final_countires.pdf') as pdf:
    plt.figure(figsize=(12, 8))
    plt.bar(sorted_final.reset_index().head(number)['geoName'], sorted_final.head(number)['Average'])
    plt.title('Average Interest by Country')
    plt.xlabel('Country')
    plt.ylabel('Average Interest')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    #plt.show()
    pdf.savefig()
    plt.close()