# Import Necessary Libraries:

In [None]:
!pip install requests
import pandas as pd
import requests
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation


# Data Collection (Part 1):
The script initiates the process of data collection from the Parliament API (api.parliament.uk/sparql/).
A SPARQL query is formulated to fetch information about inquiries submitted by Members of Parliament (MPs) in the House of Commons from January 2023 to September 2023.
The API is then queried using the requests.post method, and the response is stored.
# Handling API Response (Part 1):
The code checks if the API response status is 200 (OK), indicating a successful request.
The JSON response is then processed to extract relevant information like MP details, constituencies, and inquiry texts.
A Pandas DataFrame (mp_inquiries_df_part1) is created to organize the collected data.

In [None]:
# Step 1: Data Collection from Parliament API
sparql_query_part1 = """
SELECT ?mp ?constituency ?inquiryText
WHERE {
  ?mp a :MemberOfParliament;
      :represents ?constituency;
      :makesInquiry ?inquiry;
      :inquiryDate ?date.
  FILTER (?date >= "2023-01-01"^^xsd:date && ?date <= "2023-09-30"^^xsd:date)
}
"""

api_url = "https://api.parliament.uk/sparql"
headers = {
    "Content-Type": "application/sparql-query",
    "Accept": "application/json",
}
params = {
    "query": sparql_query_part1,  # Make sure the query is included here
    "format": "json",
}

response_part1 = requests.post(api_url, headers=headers, params=params)

if response_part1.status_code == 200:
    data_part1 = response_part1.json()
    mp_inquiries_data_part1 = []

    for result in data_part1.get("results", {}).get("bindings", []):
        mp = result.get("mp", {}).get("value", "")
        constituency = result.get("constituency", {}).get("value", "")
        inquiry_text = result.get("inquiryText", {}).get("value", "")

        mp_inquiries_data_part1.append({
            "MP": mp,
            "Constituency": constituency,
            "InquiryText": inquiry_text,
        })

    mp_inquiries_df_part1 = pd.DataFrame(mp_inquiries_data_part1)

    # Continue with the rest of the code (Region Categorization)

    def map_constituency_to_region(constituency):
        if "Scotland" in constituency:
            return "Scotland"
        elif "Northern Ireland" in constituency:
            return "Northern Ireland"
        else:
            return "England"

    mp_inquiries_df_part1['Region'] = mp_inquiries_df_part1['Constituency'].apply(map_constituency_to_region)

    # Now you have a DataFrame with 'Region' column
    print(mp_inquiries_df_part1.head())



# Step 1: Region Categorization
Region Categorization (Part 2):
Define a function (map_constituency_to_region) to categorize constituencies into regions based on a flexible approach.
Apply the mapping function to create a new 'Region' column in the DataFrame.

In [None]:
# Create the DataFrame 'mp_inquiries_df_part1' if it doesn't exist
if 'mp_inquiries_df_part1' not in globals():
    # Load the data into the DataFrame
    mp_inquiries_df_part1 = pd.DataFrame({'Constituency': ['Edinburgh South', 'Belfast East', 'Manchester Central']})

# Define a function to map constituencies to regions
def map_constituency_to_region(constituency):

    # For illustration, we'll assume a direct mapping
    if "Scotland" in constituency:
        return "Scotland"
    elif "Northern Ireland" in constituency:
        return "Northern Ireland"
    else:
        return "England"  # Default to England

# Apply the mapping function to create a 'Region' column
mp_inquiries_df_part1['Region'] = mp_inquiries_df_part1['Constituency'].apply(map_constituency_to_region)

# Now you have a DataFrame with a 'Region' column
print(mp_inquiries_df_part1.head())



# Step 2: LDA Topic Modeling
LDA Topic Modeling (Part 2):
Utilize scikit-learn to perform Latent Dirichlet Allocation (LDA) topic modeling on the inquiries' text data.
Preprocess the text data using a basic CountVectorizer.
Display the top words for each topic identified by LDA.

In [None]:

mp_inquiries_df_part1.columns

# Use a basic CountVectorizer
vectorizer = CountVectorizer(stop_words='english', max_features=1000)
inquiry_text_matrix = vectorizer.fit_transform(mp_inquiries_df_part1['Constituency'])

# Apply LDA
num_topics = 5  # You can adjust the number of topics based on your analysis
lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
lda.fit(inquiry_text_matrix)

# Display the top words for each topic
feature_names = vectorizer.get_feature_names_out()
for topic_idx, topic in enumerate(lda.components_):
    top_words = [feature_names[i] for i in topic.argsort()[:-10 - 1:-1]]
    print(f"Topic #{topic_idx + 1}: {', '.join(top_words)}")
