In [1]:
import pandas as pd
import numpy as np
import string
import scipy
import sklearn
import spacy
import nltk
import re
import os

In [4]:
from nltk import word_tokenize
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
#from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer, util

In [5]:
def read_csv_files_in_folder(folder_path):
    """
    Read all CSV files in a folder and concatenate them into a single DataFrame.

    Parameters:
    - folder_path (str): Path to the folder containing CSV files.

    Returns:
    - pd.DataFrame: Concatenated DataFrame.
    """
    # List all files in the folder
    all_files = os.listdir(folder_path)

    # List to store DataFrames
    dfs = []

    # Loop through CSV files and read them
    for file in all_files:
        if file.endswith('.csv'):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path)
            dfs.append(df)

    # Concatenate DataFrames into one
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

In [6]:
def text_embed(text_col_name, dataframe, vector_col_name):
    model = SentenceTransformer("thenlper/gte-small")
    dataframe[text_col_name] = dataframe[text_col_name].astype(str)
    dataframe[vector_col_name] = dataframe[text_col_name].apply(lambda x: model.encode(x))
    return dataframe

In [7]:
# Set random seed for reproducibility
np.random.seed(42)

# Function to find the matching label for each corpus
def match_corpus_to_label(corpus_embedding, label_embeddings, threshold=0.85):
    similarities = cosine_similarity([corpus_embedding], label_embeddings)
    max_similarity = np.max(similarities)
    
    if max_similarity >= threshold:
        # Get the index of the matching label
        label_index = np.argmax(similarities)
        return label_index
    else:
        return None

In [8]:
# Set random seed for reproducibility
np.random.seed(42)

def assign_subtopic(corpus_df, corpus_embeddings, subtopic_df, subtopic_embeddings, subtopic_index_reference, subtopic_output):
    # Iterate over each row in sample_data
    for index, row in corpus_df.iterrows():
        corpus_embedding = row[corpus_embeddings]  # Assuming 'QA_vector' is the column with corpus embeddings
    
        # If the embedding is a sequence or array, stack them vertically and compute the mean
        if isinstance(corpus_embedding, list):
            corpus_embedding = np.mean(np.vstack(corpus_embedding), axis=0)

        # If the embedding is still 2D, flatten it to 1D
        if len(corpus_embedding.shape) > 1:
            corpus_embedding = corpus_embedding.flatten()

        matching_label_index = match_corpus_to_label(corpus_embedding, np.vstack(subtopic_embeddings.values))

        # If a matching label is found, assign it to a new column 'AssignedLabel'
        if matching_label_index is not None:
            corpus_df.at[index, subtopic_output] = subtopic_df.at[matching_label_index, subtopic_index_reference]

In [9]:
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()

    # Remove numbers, symbols, and punctuation (except for the case where 2 follows CO)
    text = re.sub(r'[\d' + re.escape(string.punctuation) + '](?<![cC][oO]2)', '', text)

    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Join the tokens back into a single string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

# SMART CITIES

In [10]:
smart_cities_df = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\Supercleaned demo dataset ready for Topic mod 21 03.xlsx", 
                       sheet_name=1)
smart_cities_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,
...,...,...,...,...,...,...,...
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D..."
6578,Smart Cities,What is the projected trend of Indonesia's GDP...,"According to O'Neill (2023), the Gross Domesti...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D..."
6579,Smart Cities,How do landowners in India show preference het...,"According to Patel et al. (2020), landowners i...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D..."
6580,Smart Cities,What is the purpose of SKI Jakarta Governor Re...,SKI Jakarta Governor Regulation Number 15 of 2...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D..."


In [11]:
smart_cities_df.columns

Index(['Macrotopic ', 'Question', 'Answer', 'Label', 'Source Title ',
       'Source Url', 'Source Author '],
      dtype='object')

In [12]:
smart_cities_df['QA'] = smart_cities_df['Question'] + ' ' + smart_cities_df['Answer']
smart_cities_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,,How do operational strategies like bid price c...
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,,What is the purpose of Power-to-Hydrogen (P2H)...
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,,How does P2H contribute to the integration of ...
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,,Why is surplus ARE alone insufficient for an e...
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,,What is the best route for P2H in terms of dis...
...,...,...,...,...,...,...,...,...
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How does the Proturan Enter Agrarian dan Tata ...
6578,Smart Cities,What is the projected trend of Indonesia's GDP...,"According to O'Neill (2023), the Gross Domesti...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the projected trend of Indonesia's GDP...
6579,Smart Cities,How do landowners in India show preference het...,"According to Patel et al. (2020), landowners i...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How do landowners in India show preference het...
6580,Smart Cities,What is the purpose of SKI Jakarta Governor Re...,SKI Jakarta Governor Regulation Number 15 of 2...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the purpose of SKI Jakarta Governor Re...


In [13]:
# smart cities subtopic
folder_path = r'C:\Users\RedHat\Downloads\Tiiqu\Subtopics'
smcts_subtopic_df = read_csv_files_in_folder(folder_path)
smcts_subtopic_df

Unnamed: 0,Terminology,Description,Keywords
0,Sensor Networks for Urban Air Quality,Sensor networks for urban air quality refer to...,"Sensors, Air Quality, Urban, Monitoring, Pollu..."
1,IoT-enabled Air Quality Monitoring,IoT-enabled Air Quality Monitoring refers to t...,"IoT, Air Quality Monitoring, Real-Time Data, S..."
2,Mobile Air Quality Monitoring Stations,Mobile Air Quality Monitoring Stations refer t...,"Mobile, Air, Quality, Monitoring, Stations, Se..."
3,Real-time Pollution Mapping,Real-time Pollution Mapping refers to the proc...,"Real-time, Pollution, Mapping, Geospatial, Tra..."
4,Indoor Air Quality Monitoring Systems,Indoor Air Quality Monitoring Systems refer to...,"Indoor Air Quality, Monitoring Systems, Air Qu..."
...,...,...,...
595,Smart Water Grids,A Smart Water Grid refers to the integration o...,"Smart Water Grid, IoT, Data Analytics, Artific..."
596,Weather Forecasting for Water Management,Weather forecasting for water management refer...,"Weather Forecasting, Water Management, Meteoro..."
597,Community-based Water Quality Monitoring,Community-based Water Quality Monitoring refer...,"Community, Water Quality, Monitoring, Sustaina..."
598,Water Infrastructure Resilience,Water Infrastructure Resilience refers to the ...,"Resilience, Water Infrastructure, Disruption, ..."


In [14]:
# text embedding for smart cities
text_embed('QA', smart_cities_df, 'QA_vector')

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,,How do operational strategies like bid price c...,"[-0.012195684, 0.007873172, 0.006466941, 0.029..."
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,,What is the purpose of Power-to-Hydrogen (P2H)...,"[-0.019286802, 0.043021444, 0.012284794, -0.01..."
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,,How does P2H contribute to the integration of ...,"[-0.02020133, 0.025334762, -0.002407805, -0.01..."
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,,Why is surplus ARE alone insufficient for an e...,"[-0.051888514, 0.01082537, 0.017671604, -0.043..."
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,,What is the best route for P2H in terms of dis...,"[-0.0022440273, -0.008420247, 0.017052963, -0...."
...,...,...,...,...,...,...,...,...,...
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How does the Proturan Enter Agrarian dan Tata ...,"[-0.014814504, 0.00951411, 0.023957323, -0.050..."
6578,Smart Cities,What is the projected trend of Indonesia's GDP...,"According to O'Neill (2023), the Gross Domesti...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the projected trend of Indonesia's GDP...,"[-0.05541449, -0.0032068617, 0.071445845, -0.0..."
6579,Smart Cities,How do landowners in India show preference het...,"According to Patel et al. (2020), landowners i...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How do landowners in India show preference het...,"[-0.005819576, 0.008612114, 0.025815105, -0.02..."
6580,Smart Cities,What is the purpose of SKI Jakarta Governor Re...,SKI Jakarta Governor Regulation Number 15 of 2...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the purpose of SKI Jakarta Governor Re...,"[-0.02840907, -0.032079037, 0.06578252, -0.069..."


In [15]:
# subtopic embedding for smart cities
text_embed('Description', smcts_subtopic_df, 'Des_vector')

Unnamed: 0,Terminology,Description,Keywords,Des_vector
0,Sensor Networks for Urban Air Quality,Sensor networks for urban air quality refer to...,"Sensors, Air Quality, Urban, Monitoring, Pollu...","[-0.032511596, -0.008433948, 0.06403352, -0.01..."
1,IoT-enabled Air Quality Monitoring,IoT-enabled Air Quality Monitoring refers to t...,"IoT, Air Quality Monitoring, Real-Time Data, S...","[-0.019570377, -0.007127084, 0.05570755, -0.02..."
2,Mobile Air Quality Monitoring Stations,Mobile Air Quality Monitoring Stations refer t...,"Mobile, Air, Quality, Monitoring, Stations, Se...","[-0.016010625, 0.009993818, 0.063600145, -0.00..."
3,Real-time Pollution Mapping,Real-time Pollution Mapping refers to the proc...,"Real-time, Pollution, Mapping, Geospatial, Tra...","[-0.030291691, -0.019748587, 0.058397368, -0.0..."
4,Indoor Air Quality Monitoring Systems,Indoor Air Quality Monitoring Systems refer to...,"Indoor Air Quality, Monitoring Systems, Air Qu...","[-0.018200023, -0.003171656, 0.051177938, -0.0..."
...,...,...,...,...
595,Smart Water Grids,A Smart Water Grid refers to the integration o...,"Smart Water Grid, IoT, Data Analytics, Artific...","[-0.013377284, 0.0023783373, 0.047986627, -0.0..."
596,Weather Forecasting for Water Management,Weather forecasting for water management refer...,"Weather Forecasting, Water Management, Meteoro...","[-0.030780733, -0.030379986, 0.06936532, -0.00..."
597,Community-based Water Quality Monitoring,Community-based Water Quality Monitoring refer...,"Community, Water Quality, Monitoring, Sustaina...","[-0.0028319797, -0.019709097, 0.024996435, -0...."
598,Water Infrastructure Resilience,Water Infrastructure Resilience refers to the ...,"Resilience, Water Infrastructure, Disruption, ...","[-0.023378167, -0.053442758, 0.027936874, -0.0..."


In [16]:
# Subtopic assignment for smart cities dataset
assign_subtopic(smart_cities_df, 'QA_vector', smcts_subtopic_df, smcts_subtopic_df['Des_vector'], 'Terminology', 'Assigned Subtopic')

In [17]:
# PREPROCESSING

In [18]:
smart_cities_df['QA_prep'] = smart_cities_df['QA'].apply(preprocess_text)

In [19]:
smcts_subtopic_df['Des_prep'] = smcts_subtopic_df['Description'].apply(preprocess_text)

In [20]:
# text embedding for preprocessed smart cities corpus
text_embed('QA_prep', smart_cities_df, 'QA_prep_vector')

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,Assigned Subtopic,QA_prep,QA_prep_vector
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,,How do operational strategies like bid price c...,"[-0.012195684, 0.007873172, 0.006466941, 0.029...",Grid Voltage Stability Solutions,operational strategies like bid price control ...,"[-0.022449002, 0.018022422, 0.009411752, 0.048..."
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,,What is the purpose of Power-to-Hydrogen (P2H)...,"[-0.019286802, 0.043021444, 0.012284794, -0.01...",Waste-to-Energy Solutions,purpose powertohydrogen ph technology ph techn...,"[-0.026798876, 0.043908987, 0.032166984, -0.00..."
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,,How does P2H contribute to the integration of ...,"[-0.02020133, 0.025334762, -0.002407805, -0.01...",Renewable Energy Integration,ph contribute integration variable renewable e...,"[-0.052468862, 0.017500032, 0.029401012, -0.01..."
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,,Why is surplus ARE alone insufficient for an e...,"[-0.051888514, 0.01082537, 0.017671604, -0.043...",,surplus alone insufficient economical px syste...,"[-0.054230493, 0.025611764, 0.034356646, -0.02..."
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,,What is the best route for P2H in terms of dis...,"[-0.0022440273, -0.008420247, 0.017052963, -0....",,best route ph terms displacing diesel heavy co...,"[-0.013488555, -0.013958169, 0.04522113, -0.02..."
...,...,...,...,...,...,...,...,...,...,...,...,...
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How does the Proturan Enter Agrarian dan Tata ...,"[-0.014814504, 0.00951411, 0.023957323, -0.050...",,proturan enter agrarian dan tata huangbaden pe...,"[-0.014260622, -0.010944142, 0.051180813, -0.0..."
6578,Smart Cities,What is the projected trend of Indonesia's GDP...,"According to O'Neill (2023), the Gross Domesti...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the projected trend of Indonesia's GDP...,"[-0.05541449, -0.0032068617, 0.071445845, -0.0...",,projected trend indonesias gdp per capita acco...,"[-0.041214325, 0.00021408785, 0.07849644, -0.0..."
6579,Smart Cities,How do landowners in India show preference het...,"According to Patel et al. (2020), landowners i...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How do landowners in India show preference het...,"[-0.005819576, 0.008612114, 0.025815105, -0.02...",,landowners india show preference heterogeneity...,"[-0.019319465, 0.017689066, 0.02719766, -0.023..."
6580,Smart Cities,What is the purpose of SKI Jakarta Governor Re...,SKI Jakarta Governor Regulation Number 15 of 2...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the purpose of SKI Jakarta Governor Re...,"[-0.02840907, -0.032079037, 0.06578252, -0.069...",,purpose ski jakarta governor regulation number...,"[-0.016116215, -0.017762946, 0.05998256, -0.06..."


In [21]:
# subtopic embedding for preprocessed smart cities description
text_embed('Des_prep', smcts_subtopic_df, 'Des_prep_vector')

Unnamed: 0,Terminology,Description,Keywords,Des_vector,Des_prep,Des_prep_vector
0,Sensor Networks for Urban Air Quality,Sensor networks for urban air quality refer to...,"Sensors, Air Quality, Urban, Monitoring, Pollu...","[-0.032511596, -0.008433948, 0.06403352, -0.01...",sensor networks urban air quality refer use mu...,"[-0.025221739, -0.002512642, 0.067848094, -0.0..."
1,IoT-enabled Air Quality Monitoring,IoT-enabled Air Quality Monitoring refers to t...,"IoT, Air Quality Monitoring, Real-Time Data, S...","[-0.019570377, -0.007127084, 0.05570755, -0.02...",iotenabled air quality monitoring refers use i...,"[-0.034565482, -0.006861557, 0.062069375, -0.0..."
2,Mobile Air Quality Monitoring Stations,Mobile Air Quality Monitoring Stations refer t...,"Mobile, Air, Quality, Monitoring, Stations, Se...","[-0.016010625, 0.009993818, 0.063600145, -0.00...",mobile air quality monitoring stations refer s...,"[-0.016696014, 0.007394806, 0.07728041, -0.001..."
3,Real-time Pollution Mapping,Real-time Pollution Mapping refers to the proc...,"Real-time, Pollution, Mapping, Geospatial, Tra...","[-0.030291691, -0.019748587, 0.058397368, -0.0...",realtime pollution mapping refers process trac...,"[-0.029291991, -0.011465249, 0.07051751, -0.01..."
4,Indoor Air Quality Monitoring Systems,Indoor Air Quality Monitoring Systems refer to...,"Indoor Air Quality, Monitoring Systems, Air Qu...","[-0.018200023, -0.003171656, 0.051177938, -0.0...",indoor air quality monitoring systems refer de...,"[-0.015027424, 0.017110107, 0.057737336, 0.000..."
...,...,...,...,...,...,...
595,Smart Water Grids,A Smart Water Grid refers to the integration o...,"Smart Water Grid, IoT, Data Analytics, Artific...","[-0.013377284, 0.0023783373, 0.047986627, -0.0...",smart water grid refers integration advanced t...,"[-0.023247557, 0.007856621, 0.049678247, -0.01..."
596,Weather Forecasting for Water Management,Weather forecasting for water management refer...,"Weather Forecasting, Water Management, Meteoro...","[-0.030780733, -0.030379986, 0.06936532, -0.00...",weather forecasting water management refers us...,"[-0.039074812, -0.04721483, 0.07345538, -0.004..."
597,Community-based Water Quality Monitoring,Community-based Water Quality Monitoring refer...,"Community, Water Quality, Monitoring, Sustaina...","[-0.0028319797, -0.019709097, 0.024996435, -0....",communitybased water quality monitoring refers...,"[-0.0035583482, -0.018783882, 0.06017778, -0.0..."
598,Water Infrastructure Resilience,Water Infrastructure Resilience refers to the ...,"Resilience, Water Infrastructure, Disruption, ...","[-0.023378167, -0.053442758, 0.027936874, -0.0...",water infrastructure resilience refers ability...,"[-0.026645694, -0.03607002, 0.034129072, -0.03..."


In [22]:
#Subtopic assignment for smart cities dataset
assign_subtopic(smart_cities_df, 'QA_prep_vector', smcts_subtopic_df, smcts_subtopic_df['Des_prep_vector'], 'Terminology', 'Assigned Subtopic(Preprocessed)')

In [23]:
smart_cities_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,Assigned Subtopic,QA_prep,QA_prep_vector,Assigned Subtopic(Preprocessed)
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,,How do operational strategies like bid price c...,"[-0.012195684, 0.007873172, 0.006466941, 0.029...",Grid Voltage Stability Solutions,operational strategies like bid price control ...,"[-0.022449002, 0.018022422, 0.009411752, 0.048...",Voltage and Reactive Power Control
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,,What is the purpose of Power-to-Hydrogen (P2H)...,"[-0.019286802, 0.043021444, 0.012284794, -0.01...",Waste-to-Energy Solutions,purpose powertohydrogen ph technology ph techn...,"[-0.026798876, 0.043908987, 0.032166984, -0.00...",Grid-Connected Energy Storage
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,,How does P2H contribute to the integration of ...,"[-0.02020133, 0.025334762, -0.002407805, -0.01...",Renewable Energy Integration,ph contribute integration variable renewable e...,"[-0.052468862, 0.017500032, 0.029401012, -0.01...",Energy Storage Integration
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,,Why is surplus ARE alone insufficient for an e...,"[-0.051888514, 0.01082537, 0.017671604, -0.043...",,surplus alone insufficient economical px syste...,"[-0.054230493, 0.025611764, 0.034356646, -0.02...",Energy Storage Optimization
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,,What is the best route for P2H in terms of dis...,"[-0.0022440273, -0.008420247, 0.017052963, -0....",,best route ph terms displacing diesel heavy co...,"[-0.013488555, -0.013958169, 0.04522113, -0.02...",Energy-efficient Transportation Systems
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How does the Proturan Enter Agrarian dan Tata ...,"[-0.014814504, 0.00951411, 0.023957323, -0.050...",,proturan enter agrarian dan tata huangbaden pe...,"[-0.014260622, -0.010944142, 0.051180813, -0.0...",Public Transit Optimization
6578,Smart Cities,What is the projected trend of Indonesia's GDP...,"According to O'Neill (2023), the Gross Domesti...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the projected trend of Indonesia's GDP...,"[-0.05541449, -0.0032068617, 0.071445845, -0.0...",,projected trend indonesias gdp per capita acco...,"[-0.041214325, 0.00021408785, 0.07849644, -0.0...",
6579,Smart Cities,How do landowners in India show preference het...,"According to Patel et al. (2020), landowners i...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How do landowners in India show preference het...,"[-0.005819576, 0.008612114, 0.025815105, -0.02...",,landowners india show preference heterogeneity...,"[-0.019319465, 0.017689066, 0.02719766, -0.023...",
6580,Smart Cities,What is the purpose of SKI Jakarta Governor Re...,SKI Jakarta Governor Regulation Number 15 of 2...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the purpose of SKI Jakarta Governor Re...,"[-0.02840907, -0.032079037, 0.06578252, -0.069...",,purpose ski jakarta governor regulation number...,"[-0.016116215, -0.017762946, 0.05998256, -0.06...",


In [25]:
# Export to CSV
smart_cities_df.to_csv('Smart Cities SUBTOPIC ALLOCATION with New Dataset---85percent.csv')

In [24]:
subs = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\Smart cities macro topic, topics, sub-topics.xlsx", sheet_name=1)
subs

Unnamed: 0,Urban Mobility,Intelligent Transportation Systems,Smart Grids,Energy Management,Sustainable Infrastructure,Digital Governance,IoT in Cities,Smart Buildings,Waste Management,Water Management,Civic Engagement,Public Safety,Environmental Monitoring,Urban Planning Analytics,Smart Street Lighting,Urban Green Spaces,E-Government Services,Air Quality Monitoring,Smart Parking Solutions,Urban autonomous vehicles
0,Public Transit Optimization,Adaptive Traffic Signal Control,Advanced Metering Infrastructure (AMI),Energy Consumption Monitoring,Green Building Design,E-Government Services,Smart Sensors Deployment,Building Energy Management Systems (BEMS),Smart Bin Monitoring Systems,Smart Water Metering Systems,Digital Participatory Platforms,Emergency Response Systems,Air Quality Monitoring Systems,Spatial Data Analysis for Urban Planning,Intelligent LED Street Lighting,Smart Park Design and Management,Digital Citizen Identity Management,Sensor Networks for Urban Air Quality,IoT-enabled Parking Space Monitoring,Self-Driving Public Transportation
1,Micro-Mobility Solutions,Vehicle-to-Everything (V2X) Communication,Demand Response Systems,Smart Grid Integration,Eco-friendly Construction Materials,Digital Citizen Participation,IoT-enabled Smart Lighting Systems,Intelligent Lighting Systems,Waste Sorting Technologies,IoT-enabled Water Quality Monitoring,Crowdsourced Urban Planning,Smart Surveillance Technologies,IoT-enabled Environmental Sensors,Predictive Modeling for Population Growth,Motion-Sensing Street Lights,Green Roofs and Vertical Gardens,Online Government Portals,IoT-enabled Air Quality Monitoring,Mobile Apps for Parking Availability,Autonomous Ride-Sharing Services
2,Traffic Flow Management,Cooperative Adaptive Cruise Control (CACC),Smart Metering Technology,Demand-Side Management,Urban Green Spaces,Open Data Initiatives,Connected Intelligent Buildings,Occupancy Sensors and Analytics,RFID-based Waste Tracking,Leak Detection and Prevention,E-Government Services Accessibility,Predictive Policing Models,Smart Weather Stations,Traffic Flow Analytics for Transportation Plan...,Adaptive Brightness Control,Intelligent Irrigation Systems for Parks,Mobile Government Applications,Mobile Air Quality Monitoring Stations,Automated Parking Guidance Systems,Urban Autonomous Taxis
3,Intelligent Traffic Signal Systems,Connected Vehicles Networks,Distribution Management Systems (DMS),Automated Demand Response,Low-Impact Development (LID),Civic Engagement Platforms,IoT in Urban Agriculture,"Smart HVAC (Heating, Ventilation, and Air Cond...",IoT-enabled Waste Collection,Automated Water Distribution Systems,Citizen Feedback Systems,Real-time Crime Mapping,Real-time Pollution Tracking,Land Use Pattern Analysis,IoT-enabled Street Light Monitoring,Urban Biodiversity Conservation,E-Government Service Accessibility,Real-time Pollution Mapping,Sensor-equipped Parking Spaces,Automated Bus Systems
4,Pedestrian-Friendly Infrastructure,Traffic Surveillance Cameras,Microgrid Solutions,Building Energy Management Systems (BEMS),Smart Water Management,Smart City Portals,Smart Waste Management,Automated Temperature and Climate Control,Automated Waste Collection Systems,Real-time Water Consumption Analytics,Community Decision-Making Apps,Smart Street Lighting for Safety,Water Quality Monitoring Networks,Real-time Infrastructure Utilization Monitoring,Remote Management of Street Lights,Sensor Networks for Green Space Monitoring,Electronic Document Submission,Indoor Air Quality Monitoring Systems,Real-time Parking Occupancy Data,Smart Traffic Management for Autonomous Vehicles
5,Bike-sharing Programs,Incident Detection and Management,Grid Modernization Initiatives,Smart Home Energy Management,Sustainable Transportation Networks,Digital Identity Management,Environmental Monitoring with IoT,Energy-Efficient Building Materials,Intelligent Waste Disposal Units,Smart Irrigation Systems,Open Data Initiatives for Civic Transparency,Public Safety Drones,Soil Health Monitoring,Demographic Data Analytics,Solar-powered Street Lighting,Smart Benches with Charging Stations,Online Payment and Transaction Systems,Remote Sensing for Air Quality Assessment,Smart Parking Payment Solutions,Vehicle-to-Everything (V2X) Communication
6,Multi-modal Transport Integration,Real-time Traffic Information Systems,Energy Storage Integration,Real-Time Energy Analytics,Intelligent Traffic Management,Transparent Government Operations,Smart Parking Solutions,IoT-enabled Building Automation,Waste-to-Energy Conversion,Flood Prediction and Early Warning Systems,Online Voting Platforms,Intelligent Traffic Management for Emergency V...,Biodiversity Monitoring,Social and Economic Impact Assessment,Energy-efficient Street Light Design,Interactive Outdoor Fitness Equipment,Smart Taxation Systems,Citizen-Engaged Air Quality Data Collection,Dynamic Pricing for Parking Spaces,Autonomous Delivery Vehicles
7,Smart Parking Systems,Roadside Sensor Networks,Smart Grid Cybersecurity,Energy-Efficient HVAC Systems,Energy-Efficient Street Lighting,Cybersecurity in Governance,IoT for Traffic Management,Intelligent Fire Detection and Prevention,Smart Recycling Bins,Intelligent Stormwater Management,Mobile Apps for Civic Engagement,Gunshot Detection Systems,Urban Heat Island Monitoring,GIS (Geographic Information System) for Urban ...,Smart Street Light Poles with Integrated Sensors,Virtual Green Spaces for Indoor Environments,E-Voting Platforms,Smart HVAC Systems for Indoor Air Quality,Integration with Navigation Apps,Last-Mile Autonomous Transport Solutions
8,Ride-sharing Services,Vehicle Detection Technologies,Renewable Energy Integration,Predictive Energy Modeling,Waste-to-Energy Solutions,Smart Legislation and Regulation,Intelligent Public Transportation,Smart Security and Access Control,E-waste Management Solutions,Cloud-based Water Data Platforms,Social Media Integration for Citizen Input,Wearable Tech for First Responders,Noise Pollution Monitoring,Public Transit Ridership Analytics,Predictive Maintenance for Street Lights,Community Gardens with Smart Irrigation,Open Data Platforms for Government Transparency,Wearable Air Quality Sensors,Parking Reservation Platforms,Pedestrian Detection and Safety Systems
9,Traffic Congestion Modeling,Smart Parking Guidance Systems,Electric Vehicle Charging Infrastructure,Smart Lighting Solutions,Circular Economy Practices,Data Privacy Policies,IoT in Healthcare for Smart Cities,Indoor Air Quality Monitoring,Hazardous Waste Monitoring,Water Conservation Apps for Citizens,Virtual Town Halls,Video Analytics for Crowd Monitoring,Radiation Monitoring Systems,Accessibility and Connectivity Modeling,Connected Street Lighting Networks,Mobile Apps for Green Space Navigation,Digital Social Services Delivery,Air Quality Alerts and Notifications,Geofencing for Parking Management,Integration with Public Transportation Networks


In [25]:
UM = subs['Urban Mobility'].values
ITS = subs['Intelligent Transportation Systems'].values
SG = subs['Smart Grids'].values
EM = subs['Energy Management'].values
SI = subs['Sustainable Infrastructure'].values
DG = subs['Digital Governance'].values
IiC = subs['IoT in Cities'].values
SB = subs['Smart Buildings'].values
WM = subs['Waste Management'].values
WaM = subs['Water Management'].values
CE = subs['Civic Engagement'].values
PS = subs['Public Safety'].values
EvM = subs['Environmental Monitoring'].values
UPA = subs['Urban Planning Analytics'].values
SSL = subs['Smart Street Lighting'].values
UGS = subs['Urban Green Spaces'].values
EGS = subs['E-Government Services'].values
AQM = subs['Air Quality Monitoring'].values
SPS = subs['Smart Parking Solutions'].values
UAV = subs['Urban autonomous vehicles'].values

In [26]:
def topic_allocation(sub_topic):
    if pd.isnull(sub_topic):
        return "NA"     
    if any(keyword in sub_topic for keyword in UM):
        return "Urban Mobility"
    elif any(keyword in sub_topic for keyword in ITS):
        return "Intelligent Transportation Systems"
    elif any(keyword in sub_topic for keyword in SG):
        return "Smart Grids"
    elif any(keyword in sub_topic for keyword in EM):
        return "Energy Management"
    elif any(keyword in sub_topic for keyword in SI):
        return "Sustainable Infrastructure"
    elif any(keyword in sub_topic for keyword in DG):
        return "Digital Governance"
    elif any(keyword in sub_topic for keyword in IiC):
        return "IoT in Cities"
    elif any(keyword in sub_topic for keyword in SB):
        return "Smart Buildings"
    elif any(keyword in sub_topic for keyword in WM):
        return "Waste Management"
    elif any(keyword in sub_topic for keyword in WaM):
        return "Water Management"
    elif any(keyword in sub_topic for keyword in CE):
        return "Civic Engagement"
    elif any(keyword in sub_topic for keyword in PS):
        return "Public Safety"
    elif any(keyword in sub_topic for keyword in EvM):
        return "Environmental Monitoring"
    elif any(keyword in sub_topic for keyword in UPA):
        return "Urban Planning Analytics"
    elif any(keyword in sub_topic for keyword in SSL):
        return "Smart Street Lighting"
    elif any(keyword in sub_topic for keyword in UGS):
        return "Urban Green Spaces"
    elif any(keyword in sub_topic for keyword in EGS):
        return "E-Government Services"
    elif any(keyword in sub_topic for keyword in AQM):
        return "Air Quality Monitoring"
    elif any(keyword in sub_topic for keyword in SPS):
        return "Smart Parking Solutions"
    elif any(keyword in sub_topic for keyword in UAV):
        return "Urban autonomous vehicles"
    else:
        return "Other"

In [27]:
smart_cities_df['Topic'] = smart_cities_df['Assigned Subtopic(Preprocessed)'].apply(topic_allocation)

In [28]:
smart_cities_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,Assigned Subtopic,QA_prep,QA_prep_vector,Assigned Subtopic(Preprocessed),Topic
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,,How do operational strategies like bid price c...,"[-0.012195684, 0.007873172, 0.006466941, 0.029...",Grid Voltage Stability Solutions,operational strategies like bid price control ...,"[-0.022449002, 0.018022422, 0.009411752, 0.048...",Voltage and Reactive Power Control,Smart Grids
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,,What is the purpose of Power-to-Hydrogen (P2H)...,"[-0.019286802, 0.043021444, 0.012284794, -0.01...",Waste-to-Energy Solutions,purpose powertohydrogen ph technology ph techn...,"[-0.026798876, 0.043908987, 0.032166984, -0.00...",Grid-Connected Energy Storage,Smart Grids
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,,How does P2H contribute to the integration of ...,"[-0.02020133, 0.025334762, -0.002407805, -0.01...",Renewable Energy Integration,ph contribute integration variable renewable e...,"[-0.052468862, 0.017500032, 0.029401012, -0.01...",Energy Storage Integration,Smart Grids
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,,Why is surplus ARE alone insufficient for an e...,"[-0.051888514, 0.01082537, 0.017671604, -0.043...",,surplus alone insufficient economical px syste...,"[-0.054230493, 0.025611764, 0.034356646, -0.02...",Energy Storage Optimization,Energy Management
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,,What is the best route for P2H in terms of dis...,"[-0.0022440273, -0.008420247, 0.017052963, -0....",,best route ph terms displacing diesel heavy co...,"[-0.013488555, -0.013958169, 0.04522113, -0.02...",Energy-efficient Transportation Systems,Energy Management
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How does the Proturan Enter Agrarian dan Tata ...,"[-0.014814504, 0.00951411, 0.023957323, -0.050...",,proturan enter agrarian dan tata huangbaden pe...,"[-0.014260622, -0.010944142, 0.051180813, -0.0...",Public Transit Optimization,Urban Mobility
6578,Smart Cities,What is the projected trend of Indonesia's GDP...,"According to O'Neill (2023), the Gross Domesti...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the projected trend of Indonesia's GDP...,"[-0.05541449, -0.0032068617, 0.071445845, -0.0...",,projected trend indonesias gdp per capita acco...,"[-0.041214325, 0.00021408785, 0.07849644, -0.0...",,
6579,Smart Cities,How do landowners in India show preference het...,"According to Patel et al. (2020), landowners i...",Analysis,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How do landowners in India show preference het...,"[-0.005819576, 0.008612114, 0.025815105, -0.02...",,landowners india show preference heterogeneity...,"[-0.019319465, 0.017689066, 0.02719766, -0.023...",,
6580,Smart Cities,What is the purpose of SKI Jakarta Governor Re...,SKI Jakarta Governor Regulation Number 15 of 2...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the purpose of SKI Jakarta Governor Re...,"[-0.02840907, -0.032079037, 0.06578252, -0.069...",,purpose ski jakarta governor regulation number...,"[-0.016116215, -0.017762946, 0.05998256, -0.06...",,


In [29]:
# Export unstripped dataset to CSV
smart_cities_df.to_csv('Smart Cities Unstripped Dataset---85percent.csv')

In [30]:
smart_cities_df_striped = smart_cities_df[smart_cities_df['Topic'] != 'NA']
smart_cities_df_striped 

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,Assigned Subtopic,QA_prep,QA_prep_vector,Assigned Subtopic(Preprocessed),Topic
0,Smart cities,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Analysis,,,,How do operational strategies like bid price c...,"[-0.012195684, 0.007873172, 0.006466941, 0.029...",Grid Voltage Stability Solutions,operational strategies like bid price control ...,"[-0.022449002, 0.018022422, 0.009411752, 0.048...",Voltage and Reactive Power Control,Smart Grids
1,Smart cities,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Science and Tech,,,,What is the purpose of Power-to-Hydrogen (P2H)...,"[-0.019286802, 0.043021444, 0.012284794, -0.01...",Waste-to-Energy Solutions,purpose powertohydrogen ph technology ph techn...,"[-0.026798876, 0.043908987, 0.032166984, -0.00...",Grid-Connected Energy Storage,Smart Grids
2,Smart cities,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Science and Tech,,,,How does P2H contribute to the integration of ...,"[-0.02020133, 0.025334762, -0.002407805, -0.01...",Renewable Energy Integration,ph contribute integration variable renewable e...,"[-0.052468862, 0.017500032, 0.029401012, -0.01...",Energy Storage Integration,Smart Grids
3,Smart cities,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Why is it almost impossible for grid operators...,,,,Why is surplus ARE alone insufficient for an e...,"[-0.051888514, 0.01082537, 0.017671604, -0.043...",,surplus alone insufficient economical px syste...,"[-0.054230493, 0.025611764, 0.034356646, -0.02...",Energy Storage Optimization,Energy Management
4,Smart cities,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Strategy,,,,What is the best route for P2H in terms of dis...,"[-0.0022440273, -0.008420247, 0.017052963, -0....",,best route ph terms displacing diesel heavy co...,"[-0.013488555, -0.013958169, 0.04522113, -0.02...",Energy-efficient Transportation Systems,Energy Management
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6573,Smart Cities,Why is it important for participants to carefu...,Participants should carefully consider each al...,Strategy,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",Why is it important for participants to carefu...,"[0.0038841115, 0.03513701, 0.020665823, -0.060...",,important participants carefully consider alte...,"[-0.010255851, 0.022660147, 0.021899108, -0.05...",Urban Green Spaces,Sustainable Infrastructure
6575,Smart Cities,"According to Ali et al. (2021), what is the re...",Ali et al. (2021) highlighted the dynamics of ...,Science and tech,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...","According to Ali et al. (2021), what is the re...","[-0.0128415665, 0.002763815, 0.099401414, -0.0...",Policy Initiatives for Air Quality Improvement...,according ali et al relationship transit orien...,"[-0.015362982, 0.003872255, 0.101223275, -0.01...",Sustainable Transportation Networks,Sustainable Infrastructure
6576,Smart Cities,What is the significance of transit oriented d...,Transit Oriented Development (TOD) is signific...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",What is the significance of transit oriented d...,"[-0.0069669327, -0.0042390767, 0.053100187, -0...",Public Transit Optimization,significance transit oriented development tod ...,"[-0.0009637197, -0.016547738, 0.07069088, -0.0...",Public Transit Optimization,Urban Mobility
6577,Smart Cities,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Management,"Smart urbanism, citizen-centric approaches and...",https://www.sciencedirect.com/science/article/...,"I Mayan Koto Surya wan, Rachman Guyana, Ivan D...",How does the Proturan Enter Agrarian dan Tata ...,"[-0.014814504, 0.00951411, 0.023957323, -0.050...",,proturan enter agrarian dan tata huangbaden pe...,"[-0.014260622, -0.010944142, 0.051180813, -0.0...",Public Transit Optimization,Urban Mobility


In [31]:
smart_cities_df_striped = smart_cities_df_striped[['Question', 'Answer', 'Macrotopic ', 'Topic',
                                                   'Assigned Subtopic(Preprocessed)', 'Source Url']]
smart_cities_df_striped

Unnamed: 0,Question,Answer,Macrotopic,Topic,Assigned Subtopic(Preprocessed),Source Url
0,How do operational strategies like bid price c...,Both bid price control and wind forecast contr...,Smart cities,Smart Grids,Voltage and Reactive Power Control,
1,What is the purpose of Power-to-Hydrogen (P2H)...,P2H technology is used to store excess electri...,Smart cities,Smart Grids,Grid-Connected Energy Storage,
2,How does P2H contribute to the integration of ...,P2H technology helps to integrate ARE into the...,Smart cities,Smart Grids,Energy Storage Integration,
3,Why is surplus ARE alone insufficient for an e...,Surplus ARE alone is insufficient for an econo...,Smart cities,Energy Management,Energy Storage Optimization,
4,What is the best route for P2H in terms of dis...,The best route for P2H is to displace diesel i...,Smart cities,Energy Management,Energy-efficient Transportation Systems,
...,...,...,...,...,...,...
6573,Why is it important for participants to carefu...,Participants should carefully consider each al...,Smart Cities,Sustainable Infrastructure,Urban Green Spaces,https://www.sciencedirect.com/science/article/...
6575,"According to Ali et al. (2021), what is the re...",Ali et al. (2021) highlighted the dynamics of ...,Smart Cities,Sustainable Infrastructure,Sustainable Transportation Networks,https://www.sciencedirect.com/science/article/...
6576,What is the significance of transit oriented d...,Transit Oriented Development (TOD) is signific...,Smart Cities,Urban Mobility,Public Transit Optimization,https://www.sciencedirect.com/science/article/...
6577,How does the Proturan Enter Agrarian dan Tata ...,The guidelines provided in the Proturan Enter ...,Smart Cities,Urban Mobility,Public Transit Optimization,https://www.sciencedirect.com/science/article/...


In [32]:
smart_cities_df_striped.rename(columns={'Assigned Subtopic(Preprocessed)': 'Subtopic'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  smart_cities_df_striped.rename(columns={'Assigned Subtopic(Preprocessed)': 'Subtopic'}, inplace=True)


In [95]:
smart_cities_df_striped.to_csv('Smart Cities for SUPERCLEANED dataset 23-3-2024.csv')

In [33]:
smart_cities_df.to_csv('Unstriped Smart Cities for SUPERCLEANED dataset 23-3-2024.csv')

# CLEAN ENERGY

In [56]:
afc_energy_df = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\Supercleaned demo dataset ready for Topic mod 21 03.xlsx", 
                       sheet_name=2)
afc_energy_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author
0,Affordable and Clean Energy,What is the main advantage of liquid air energ...,The main advantage of LIES is that it is geogr...,Science and Tech,,,
1,Affordable and Clean Energy,How can the efficiency of LIES be improved?,The efficiency of LIES can be improved by util...,Management,,,
2,Affordable and Clean Energy,How can the Round Trip Efficiency (RTE) be imp...,The Round Trip Efficiency (RTE) can be improve...,Management,,,
3,Affordable and Clean Energy,How does the Solar-LAES-SE system with decoupl...,The Solar-LAES-SE system with decoupled chargi...,Analysis,,,
4,Affordable and Clean Energy,Why is implementing renewable energy-based ind...,Implementing renewable energy-based industrial...,Strategy,,,
...,...,...,...,...,...,...,...
1660,Affordable and Clean Energy,How did solar irradiance in Spain compare to t...,Solar irradiance in Spain was 20% to 30% above...,Analysis,,,
1661,Affordable and Clean Energy,What weather conditions in Europe were respons...,The negative phase of the North Atlantic Oscil...,Analysis,,,
1662,Affordable and Clean Energy,Based on the model constructed using data from...,The model showed that wind energy reduced life...,Science and Tech,,,
1663,Affordable and Clean Energy,Despite the high share of wind energy on the I...,"Even with up to 70% wind energy on the grid, t...",Analysis,,,


In [57]:
afc_energy_df['QA'] = afc_energy_df['Question'] + ' ' + afc_energy_df['Answer']
afc_energy_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA
0,Affordable and Clean Energy,What is the main advantage of liquid air energ...,The main advantage of LIES is that it is geogr...,Science and Tech,,,,What is the main advantage of liquid air energ...
1,Affordable and Clean Energy,How can the efficiency of LIES be improved?,The efficiency of LIES can be improved by util...,Management,,,,How can the efficiency of LIES be improved? Th...
2,Affordable and Clean Energy,How can the Round Trip Efficiency (RTE) be imp...,The Round Trip Efficiency (RTE) can be improve...,Management,,,,How can the Round Trip Efficiency (RTE) be imp...
3,Affordable and Clean Energy,How does the Solar-LAES-SE system with decoupl...,The Solar-LAES-SE system with decoupled chargi...,Analysis,,,,How does the Solar-LAES-SE system with decoupl...
4,Affordable and Clean Energy,Why is implementing renewable energy-based ind...,Implementing renewable energy-based industrial...,Strategy,,,,Why is implementing renewable energy-based ind...
...,...,...,...,...,...,...,...,...
1660,Affordable and Clean Energy,How did solar irradiance in Spain compare to t...,Solar irradiance in Spain was 20% to 30% above...,Analysis,,,,How did solar irradiance in Spain compare to t...
1661,Affordable and Clean Energy,What weather conditions in Europe were respons...,The negative phase of the North Atlantic Oscil...,Analysis,,,,What weather conditions in Europe were respons...
1662,Affordable and Clean Energy,Based on the model constructed using data from...,The model showed that wind energy reduced life...,Science and Tech,,,,Based on the model constructed using data from...
1663,Affordable and Clean Energy,Despite the high share of wind energy on the I...,"Even with up to 70% wind energy on the grid, t...",Analysis,,,,Despite the high share of wind energy on the I...


In [58]:
sub_df1 = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\clean energy topic and subtopics (2).xlsx", 
                                 sheet_name='subtopic description 1-15')
sub_df2 = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\clean energy topic and subtopics (2).xlsx", 
                                 sheet_name='Subtopics description (15-30)')
afce_subtopic_df = pd.concat([sub_df1, sub_df2], axis=0).reset_index(drop=True)
afce_subtopic_df

Unnamed: 0,Topic,Subtopic,Description,Keywords
0,Solar Photovoltaic Systems,Solar Photovoltaic Technologies,Solar photovoltaic (PV) technologies encompass...,"Solar PV, Photovoltaic, Renewable"
1,Solar Photovoltaic Systems,Grid-Tied Solar Systems,Grid-tied solar systems are photovoltaic insta...,"Grid-tied, Solar system, Net metering"
2,Solar Photovoltaic Systems,Off-Grid Solar Systems,Off-grid solar systems operate independently o...,"Off-grid, Stand-alone, Remote"
3,Solar Photovoltaic Systems,Solar Panel Manufacturing Processes,Solar panel manufacturing processes involve th...,"Manufacturing, Production, Efficiency"
4,Solar Photovoltaic Systems,Solar Inverter Technologies,Solar inverter technologies convert direct cur...,"Inverter, Conversion, Efficiency"
...,...,...,...,...
551,,Carbon Offsets and Biodiversity Conservation,Carbon Offsets and Biodiversity Conservation p...,"habitat restoration, ecosystem services"
552,,Carbon Footprint Reduction Programs,Carbon Footprint Reduction Programs implement ...,"greenhouse gas emissions, environmental perfor..."
553,,Carbon Offset Programs for Small Businesses,Carbon Offset Programs for Small Businesses en...,"small enterprises, corporate responsibility"
554,,Carbon Neutral Events and Conferences,Carbon Neutral Events and Conferences aim to m...,"sustainable event planning, eco-friendly pract..."


In [59]:
# text embedding for clean energy corpus
text_embed('QA', afc_energy_df, 'QA_vector')

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector
0,Affordable and Clean Energy,What is the main advantage of liquid air energ...,The main advantage of LIES is that it is geogr...,Science and Tech,,,,What is the main advantage of liquid air energ...,"[-0.004522498, -0.037984867, 0.036875945, 0.04..."
1,Affordable and Clean Energy,How can the efficiency of LIES be improved?,The efficiency of LIES can be improved by util...,Management,,,,How can the efficiency of LIES be improved? Th...,"[-0.046298273, -0.0027031552, 0.05756366, 0.00..."
2,Affordable and Clean Energy,How can the Round Trip Efficiency (RTE) be imp...,The Round Trip Efficiency (RTE) can be improve...,Management,,,,How can the Round Trip Efficiency (RTE) be imp...,"[-0.055615943, -0.003967587, 0.057388723, 0.01..."
3,Affordable and Clean Energy,How does the Solar-LAES-SE system with decoupl...,The Solar-LAES-SE system with decoupled chargi...,Analysis,,,,How does the Solar-LAES-SE system with decoupl...,"[-0.06440674, 0.0287099, 0.071825825, 0.034501..."
4,Affordable and Clean Energy,Why is implementing renewable energy-based ind...,Implementing renewable energy-based industrial...,Strategy,,,,Why is implementing renewable energy-based ind...,"[-0.039428297, 0.0035552864, 0.008631619, -0.0..."
...,...,...,...,...,...,...,...,...,...
1660,Affordable and Clean Energy,How did solar irradiance in Spain compare to t...,Solar irradiance in Spain was 20% to 30% above...,Analysis,,,,How did solar irradiance in Spain compare to t...,"[-0.023560774, -0.0006682332, 0.030136807, 0.0..."
1661,Affordable and Clean Energy,What weather conditions in Europe were respons...,The negative phase of the North Atlantic Oscil...,Analysis,,,,What weather conditions in Europe were respons...,"[-0.05348125, 0.013398707, 0.0406969, -0.02428..."
1662,Affordable and Clean Energy,Based on the model constructed using data from...,The model showed that wind energy reduced life...,Science and Tech,,,,Based on the model constructed using data from...,"[-0.034322396, 0.03786056, 0.03920884, 0.00370..."
1663,Affordable and Clean Energy,Despite the high share of wind energy on the I...,"Even with up to 70% wind energy on the grid, t...",Analysis,,,,Despite the high share of wind energy on the I...,"[-0.01959131, 0.008530454, -0.0013339912, -0.0..."


In [60]:
# subtopic embedding for clean energy
text_embed('Description', afce_subtopic_df, 'Des_vector')

Unnamed: 0,Topic,Subtopic,Description,Keywords,Des_vector
0,Solar Photovoltaic Systems,Solar Photovoltaic Technologies,Solar photovoltaic (PV) technologies encompass...,"Solar PV, Photovoltaic, Renewable","[-0.039536417, 0.046666723, 0.041797113, -0.00..."
1,Solar Photovoltaic Systems,Grid-Tied Solar Systems,Grid-tied solar systems are photovoltaic insta...,"Grid-tied, Solar system, Net metering","[-0.04853681, 0.024900572, 0.015712507, -0.008..."
2,Solar Photovoltaic Systems,Off-Grid Solar Systems,Off-grid solar systems operate independently o...,"Off-grid, Stand-alone, Remote","[-0.01492235, 0.045314435, 0.022829114, -0.004..."
3,Solar Photovoltaic Systems,Solar Panel Manufacturing Processes,Solar panel manufacturing processes involve th...,"Manufacturing, Production, Efficiency","[-0.042553786, 0.034881733, 0.046635456, -0.02..."
4,Solar Photovoltaic Systems,Solar Inverter Technologies,Solar inverter technologies convert direct cur...,"Inverter, Conversion, Efficiency","[-0.03541326, 0.034002777, 0.024631288, 0.0234..."
...,...,...,...,...,...
551,,Carbon Offsets and Biodiversity Conservation,Carbon Offsets and Biodiversity Conservation p...,"habitat restoration, ecosystem services","[-0.025664518, 0.013868737, 0.047231656, 6.798..."
552,,Carbon Footprint Reduction Programs,Carbon Footprint Reduction Programs implement ...,"greenhouse gas emissions, environmental perfor...","[-0.03491686, 0.030108815, 0.06351311, 0.00390..."
553,,Carbon Offset Programs for Small Businesses,Carbon Offset Programs for Small Businesses en...,"small enterprises, corporate responsibility","[-0.024124589, 0.01781019, 0.04456468, -0.0321..."
554,,Carbon Neutral Events and Conferences,Carbon Neutral Events and Conferences aim to m...,"sustainable event planning, eco-friendly pract...","[-0.058755726, 0.030933961, 0.030578673, -0.01..."


In [61]:
# Subtopic assignment for clean energy dataset
assign_subtopic(afc_energy_df, 'QA_vector', afce_subtopic_df, afce_subtopic_df['Des_vector'], 'Subtopic', 'Assigned Subtopic')

In [62]:
afc_energy_df['QA_prep'] = afc_energy_df['QA'].apply(preprocess_text)

In [63]:
afce_subtopic_df['Des_prep'] = afce_subtopic_df['Description'].apply(preprocess_text)

In [64]:
# text embedding for preprocessed clean energy corpus
text_embed('QA_prep', afc_energy_df, 'QA_prep_vector')

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,Assigned Subtopic,QA_prep,QA_prep_vector
0,Affordable and Clean Energy,What is the main advantage of liquid air energ...,The main advantage of LIES is that it is geogr...,Science and Tech,,,,What is the main advantage of liquid air energ...,"[-0.004522498, -0.037984867, 0.036875945, 0.04...",Compressed Air Energy Storage (CAES),main advantage liquid air energy storage lies ...,"[0.0001286425, -0.0215348, 0.03705814, 0.03461..."
1,Affordable and Clean Energy,How can the efficiency of LIES be improved?,The efficiency of LIES can be improved by util...,Management,,,,How can the efficiency of LIES be improved? Th...,"[-0.046298273, -0.0027031552, 0.05756366, 0.00...",,efficiency lies improved efficiency lies impro...,"[-0.05188921, 0.008030583, 0.05519354, 0.01358..."
2,Affordable and Clean Energy,How can the Round Trip Efficiency (RTE) be imp...,The Round Trip Efficiency (RTE) can be improve...,Management,,,,How can the Round Trip Efficiency (RTE) be imp...,"[-0.055615943, -0.003967587, 0.057388723, 0.01...",Geothermal Power Plant Efficiency,round trip efficiency rte improved less lnglae...,"[-0.054776583, -0.004049266, 0.050917335, 0.01..."
3,Affordable and Clean Energy,How does the Solar-LAES-SE system with decoupl...,The Solar-LAES-SE system with decoupled chargi...,Analysis,,,,How does the Solar-LAES-SE system with decoupl...,"[-0.06440674, 0.0287099, 0.071825825, 0.034501...",Solar Inverter Technologies,solarlaesse system decoupled charging discharg...,"[-0.059689205, 0.033795923, 0.055474304, 0.027..."
4,Affordable and Clean Energy,Why is implementing renewable energy-based ind...,Implementing renewable energy-based industrial...,Strategy,,,,Why is implementing renewable energy-based ind...,"[-0.039428297, 0.0035552864, 0.008631619, -0.0...",Industrial Energy Management,implementing renewable energybased industrial ...,"[-0.04669596, 0.0006737985, 0.03521731, -0.023..."
...,...,...,...,...,...,...,...,...,...,...,...,...
1660,Affordable and Clean Energy,How did solar irradiance in Spain compare to t...,Solar irradiance in Spain was 20% to 30% above...,Analysis,,,,How did solar irradiance in Spain compare to t...,"[-0.023560774, -0.0006682332, 0.030136807, 0.0...",,solar irradiance spain compare rest europe apr...,"[-0.03631249, 0.0058690365, 0.034752488, 0.027..."
1661,Affordable and Clean Energy,What weather conditions in Europe were respons...,The negative phase of the North Atlantic Oscil...,Analysis,,,,What weather conditions in Europe were respons...,"[-0.05348125, 0.013398707, 0.0406969, -0.02428...",,weather conditions europe responsible lowpress...,"[-0.07678591, -0.0010808985, 0.036194973, -0.0..."
1662,Affordable and Clean Energy,Based on the model constructed using data from...,The model showed that wind energy reduced life...,Science and Tech,,,,Based on the model constructed using data from...,"[-0.034322396, 0.03786056, 0.03920884, 0.00370...",Social and Economic Benefits of Wind Farms,based model constructed using data irish grid ...,"[-0.044054758, 0.023186581, 0.034040935, -0.00..."
1663,Affordable and Clean Energy,Despite the high share of wind energy on the I...,"Even with up to 70% wind energy on the grid, t...",Analysis,,,,Despite the high share of wind energy on the I...,"[-0.01959131, 0.008530454, -0.0013339912, -0.0...",Small-Scale Wind Turbines,despite high share wind energy irish grid achi...,"[-0.039006848, 0.020299856, 0.0010144438, 0.00..."


In [65]:
# subtopic embedding for preprocessed clean energy description
text_embed('Des_prep', afce_subtopic_df, 'Des_prep_vector')

Unnamed: 0,Topic,Subtopic,Description,Keywords,Des_vector,Des_prep,Des_prep_vector
0,Solar Photovoltaic Systems,Solar Photovoltaic Technologies,Solar photovoltaic (PV) technologies encompass...,"Solar PV, Photovoltaic, Renewable","[-0.039536417, 0.046666723, 0.041797113, -0.00...",solar photovoltaic pv technologies encompass v...,"[-0.04240937, 0.022927899, 0.070242435, -0.019..."
1,Solar Photovoltaic Systems,Grid-Tied Solar Systems,Grid-tied solar systems are photovoltaic insta...,"Grid-tied, Solar system, Net metering","[-0.04853681, 0.024900572, 0.015712507, -0.008...",gridtied solar systems photovoltaic installati...,"[-0.06875977, 0.017120644, 0.0038857667, -0.01..."
2,Solar Photovoltaic Systems,Off-Grid Solar Systems,Off-grid solar systems operate independently o...,"Off-grid, Stand-alone, Remote","[-0.01492235, 0.045314435, 0.022829114, -0.004...",offgrid solar systems operate independently el...,"[-0.010985108, 0.01931899, 0.030053258, -0.004..."
3,Solar Photovoltaic Systems,Solar Panel Manufacturing Processes,Solar panel manufacturing processes involve th...,"Manufacturing, Production, Efficiency","[-0.042553786, 0.034881733, 0.046635456, -0.02...",solar panel manufacturing processes involve fa...,"[-0.043765012, 0.041055497, 0.054430105, -0.02..."
4,Solar Photovoltaic Systems,Solar Inverter Technologies,Solar inverter technologies convert direct cur...,"Inverter, Conversion, Efficiency","[-0.03541326, 0.034002777, 0.024631288, 0.0234...",solar inverter technologies convert direct cur...,"[-0.050924208, 0.030663075, 0.028110225, 0.008..."
...,...,...,...,...,...,...,...
551,,Carbon Offsets and Biodiversity Conservation,Carbon Offsets and Biodiversity Conservation p...,"habitat restoration, ecosystem services","[-0.025664518, 0.013868737, 0.047231656, 6.798...",carbon offsets biodiversity conservation proje...,"[-0.021346644, 0.0083611095, 0.051733196, 0.00..."
552,,Carbon Footprint Reduction Programs,Carbon Footprint Reduction Programs implement ...,"greenhouse gas emissions, environmental perfor...","[-0.03491686, 0.030108815, 0.06351311, 0.00390...",carbon footprint reduction programs implement ...,"[-0.03552793, 0.027457634, 0.070954815, -0.005..."
553,,Carbon Offset Programs for Small Businesses,Carbon Offset Programs for Small Businesses en...,"small enterprises, corporate responsibility","[-0.024124589, 0.01781019, 0.04456468, -0.0321...",carbon offset programs small businesses enable...,"[-0.0234186, 0.018056037, 0.04026378, -0.02994..."
554,,Carbon Neutral Events and Conferences,Carbon Neutral Events and Conferences aim to m...,"sustainable event planning, eco-friendly pract...","[-0.058755726, 0.030933961, 0.030578673, -0.01...",carbon neutral events conferences aim minimize...,"[-0.051341422, 0.03127225, 0.0458548, -0.00431..."


In [67]:
#Subtopic assignment for clean energy dataset
assign_subtopic(afc_energy_df, 'QA_prep_vector', afce_subtopic_df, afce_subtopic_df['Des_prep_vector'], 'Subtopic', 'Assigned Subtopic(Preprocessed)')

In [77]:
subs_cleanenergy = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\clean energy topic and subtopics (2).xlsx", 
                                 sheet_name=1)
subs_cleanenergy

Unnamed: 0,Solar Photovoltaic Systems,Wind Turbines,Hydropower Plants,Geothermal Energy,Biomass Energy,Tidal and Wave Energy,Energy Storage Technologies,Smart Grid Technologies,Energy-Efficient Lighting,Energy for sustainable Transportation,...,Microgrids,Energy Management Systems,Clean Energy Integration,Community Solar Projects,Urban planning clean energy,Climate Resilience,Wind Farms,Clean Energy skills,Renewable Energy Certification,Carbon Offset Programs
0,Solar Photovoltaic Technologies,Wind Turbine Technologies,Hydropower Plant Types,Geothermal Resource Exploration,Biomass Feedstocks,Tidal Energy Technologies,Battery Energy Storage Systems (BESS),Advanced Metering Infrastructure (AMI),Light Emitting Diodes (LEDs),Electric Vehicles (EVs),...,Decentralized Energy Distribution,Demand Response Systems,Grid Integration of Renewable Energy,Community Solar Models,Sustainable Urban Design,Resilience Planning for Energy Infrastructure,Onshore Wind Farm Development,Renewable Energy Technologies Training,Renewable Energy Certificate (REC) Programs,Voluntary Carbon Offset Programs
1,Grid-TiedSolar Systems,Onshore Wind Farms,Run-of-River Hydropower,Geothermal Power Plants,Bioenergy Conversion Technologies,Tidal Stream Generators,Lithium-Ion Battery Technology,Smart Grid Communication Technologies,Compact Fluorescent Lamps (CFLs),Hybrid Electric Vehicles (HEVs),...,Off-Grid Microgrids,Smart Grid Integration,Hybrid Renewable Energy Systems,Shared Solar Programs,Energy-Efficient Building Codes,Climate-Resilient Energy Systems,Offshore Wind Farm Projects,Energy Efficiency Certification Programs,Green Power Certification,Compliance Carbon Offset Markets
2,Off-Grid Solar Systems,Offshore Wind Farms,Reservoir Hydropower,Enhanced Geothermal Systems (EGS),Biomass Power Plants,Tidal Range Technologies,Flow Battery Systems,Demand Response Systems,Energy-Efficient Incandescent Bulbs,Plug-In Hybrid Electric Vehicles (PHEVs),...,Renewable Energy Integration in Microgrids,Home Energy Management Systems,Energy Storage Integration,Solar Gardens and Collective Ownership,Integration of Renewable Energy in Urban Areas,Disaster Preparedness for Energy Facilities,Wind Turbine Technology Advances,Grid Management and Smart Grid Training,Carbon Offset Certification,Carbon Neutral Certification Programs
3,Solar Panel Manufacturing Processes,Wind Turbine Design and Efficiency,Pumped Storage Hydropower,Direct Use of Geothermal Energy,Biogas Production from Biomass,Tidal Barrages,Pumped Hydro Storage,Smart Grid Cybersecurity,Halogen Incandescent Lamps,Battery Electric Vehicles (BEVs),...,Energy Storage Solutions for Microgrids,Industrial Energy Management,Integration of Distributed Energy Resources,Low-Income Community Solar Initiatives,Green Building Certifications,Microgrid Resilience Strategies,Wind Farm Siting and Planning,Sustainable Building Design Skills,Sustainability Standards for Renewable Energy,Carbon Credit Trading Platforms
4,Solar Inverter Technologies,Wind Turbine Components,Hydropower Plant Design,Binary Cycle Geothermal Power Plants,Biomass Pellet Production,Wave Energy Converters,Compressed Air Energy Storage (CAES),Grid Automation and Control,Lighting Controls and Sensors,Hydrogen Fuel Cell Vehicles,...,Smart Grid Technologies for Microgrids,Building Energy Management,Smart Grids and Clean Energy,Virtual Net Metering for Community Solar,Smart Cities and Clean Energy,Renewable Energy in Disaster Recovery,Wind Resource Assessment,Energy Auditing and Management,Renewable Energy Labeling Programs,Renewable Energy Certificates (RECs) as Offsets
5,Photovoltaic Cell Efficiency,Wind Turbine Blade Technology,Turbine Technologies,Flash Steam Geothermal Power Plants,Anaerobic Digestion,Point Absorbers,Flywheel Energy Storage,Distributed Energy Resources Integration,Smart Lighting Systems,Alternative Fuels for Transportation,...,Microgrid Control and Management Systems,Microgrid Energy Management,Microgrid Integration,Community Solar Policy and Regulations,Electrification of Urban Transportation,Resilient Grid Design and Operation,Wind Farm Grid Integration,Electric Vehicle Infrastructure Training,Third-Party Verification for Clean Energy,Afforestation and Reforestation Projects
6,Solar Battery Storage Solutions,Wind Turbine Gearboxes,Hydropower Efficiency,Geothermal Heat Pumps,Biomass Gasification,Oscillating Water Columns,Thermal Energy Storage,Microgrid Technologies,Daylight Harvesting,Biofuels for Sustainable Mobility,...,Resilience and Reliability in Microgrids,Cloud-Based Energy Management,Demand Response and Clean Energy,Community Solar Financing Models,Green Infrastructure in Urban Planning,Coastal Infrastructure Resilience,Community Wind Projects,Energy Storage System Operations,Environmental Product Declarations (EPD),Methane Capture and Utilization for Offsets
7,Solar Energy Policy and Regulations,Wind Turbine Control Systems,Environmental Impact of Hydropower,Geothermal Reservoir Engineering,Biomass Combustion,Attenuators,Molten Salt Energy Storage,Electric Vehicle (EV) Integration with Smart Grid,Tunable White Lighting,Sustainable Urban Mobility,...,Community-Based Microgrid Projects,Energy Efficiency Monitoring,Virtual Power Plants,Community Solar in Urban Environments,Net-Zero Energy Communities,Climate-Resilient Building Codes,Hybrid Wind and Solar Farms,Hydrogen Technology Training,ISO Standards for Renewable Energy,Blue Carbon Offsetting (Coastal Ecosystems)
8,Net Metering and Feed-in Tariffs,Wind Turbine Maintenance,Fish-friendly Hydropower Technologies,Geothermal Fluid Chemistry,Cofiring Biomass with Coal,Overtopping Devices,Supercapacitors,Energy Storage Integration with Smart Grid,Human-Centric Lighting,Charging Infrastructure for EVs,...,Islanded Microgrids,Automated Energy Control Systems,Integration of Electric Vehicles,Community Solar in Rural Areas,Urban Heat Island Mitigation,Heatwave Resilience in Energy Systems,Repowering and Upgrading Wind Farms,Solar Photovoltaic Installation Skills,Renewable Portfolio Standards (RPS),Carbon Offset Projects in Developing Countries
9,Solar Photovoltaic Integration in Smart Grids,Wind Turbine Monitoring and Diagnostics,Sediment Management in Hydropower,Geothermal Well Drilling,Biomass for Heat and CHP,Tidal and Wave Energy Resource Assessment,Advanced Materials for Energy Storage,Smart Sensors and Monitoring,Energy Star and Labeling Programs,Vehicle-to-Grid (V2G) Integration,...,Remote Area Power Systems,Peak Load Management,Power-to-Gas Technologies,Benefits of Community Solar Projects,District Energy Systems in Urban Areas,Flood-Resilient Energy Infrastructure,Advances in Wind Turbine Materials,Wind Turbine Maintenance and Repair Skills,Leadership in Energy and Environmental Design ...,Carbon Capture and Storage (CCS) for Offsets


In [78]:
SPS = subs_cleanenergy['Solar Photovoltaic Systems']
SPS.dropna(inplace=True)

In [79]:
subs_cleanenergy.columns

Index(['Solar Photovoltaic Systems', 'Wind Turbines', 'Hydropower Plants',
       'Geothermal Energy', 'Biomass Energy', 'Tidal and Wave Energy',
       'Energy Storage Technologies', 'Smart Grid Technologies',
       'Energy-Efficient Lighting', 'Energy for sustainable Transportation',
       'Energy Electric Vehicles', 'Green Buildings',
       'Energy-efficient Appliances', 'Carbon Capture and Storage',
       'Green Hydrogen Production', 'Sustainable Agriculture Energy',
       'Circular Economy Energy', 'Eco-Friendly Construction Materials',
       'Ocean Thermal Energy ', 'Distributed Energy Resources', 'Microgrids',
       'Energy Management Systems', 'Clean Energy Integration',
       'Community Solar Projects', 'Urban planning clean energy ',
       'Climate Resilience', ' Wind Farms', 'Clean Energy skills',
       'Renewable Energy Certification', 'Carbon Offset Programs'],
      dtype='object')

In [80]:
SPS = SPS.values
WT = subs_cleanenergy['Wind Turbines'].values
HP = subs_cleanenergy['Hydropower Plants'].values
GE = subs_cleanenergy['Geothermal Energy'].values
BE = subs_cleanenergy['Biomass Energy'].values
TWE = subs_cleanenergy['Tidal and Wave Energy'].values
EST = subs_cleanenergy['Energy Storage Technologies'].values
SGT = subs_cleanenergy['Smart Grid Technologies'].values
EEL = subs_cleanenergy['Energy-Efficient Lighting'].values
EnST = subs_cleanenergy['Energy for sustainable Transportation'].values
EEV = subs_cleanenergy['Energy Electric Vehicles'].values
GB = subs_cleanenergy['Green Buildings'].values
EEA = subs_cleanenergy['Energy-efficient Appliances'].values
CCS = subs_cleanenergy['Carbon Capture and Storage'].values
GHP = subs_cleanenergy['Green Hydrogen Production'].values
SAE = subs_cleanenergy['Sustainable Agriculture Energy'].values
CEE = subs_cleanenergy['Circular Economy Energy'].values
EFC = subs_cleanenergy['Eco-Friendly Construction Materials'].values
OTE = subs_cleanenergy['Ocean Thermal Energy '].values
DER = subs_cleanenergy['Distributed Energy Resources'].values
MGD = subs_cleanenergy['Microgrids'].values
EMS = subs_cleanenergy['Energy Management Systems'].values
CEI = subs_cleanenergy['Clean Energy Integration'].values
CSP = subs_cleanenergy['Community Solar Projects'].values
UPCE = subs_cleanenergy['Urban planning clean energy '].values
CR = subs_cleanenergy['Climate Resilience'].values
WF = subs_cleanenergy[' Wind Farms'].values
CES = subs_cleanenergy['Clean Energy skills'].values
REC = subs_cleanenergy['Renewable Energy Certification'].values
COP = subs_cleanenergy['Carbon Offset Programs'].values

In [81]:
missing_values = ['nan']
SPS_cleaned = SPS[~np.isin(SPS, missing_values)]

In [84]:
SPS_cleaned

array(['Solar Photovoltaic Technologies', 'Grid-TiedSolar Systems',
       'Off-Grid Solar Systems', 'Solar Panel Manufacturing Processes',
       'Solar Inverter Technologies', 'Photovoltaic Cell Efficiency',
       'Solar Battery Storage Solutions',
       'Solar Energy Policy and Regulations',
       'Net Metering and Feed-in Tariffs',
       'Solar Photovoltaic Integration in Smart Grids',
       'Solar Photovoltaic Monitoring Systems',
       'Advancements in Solar Panel Materials',
       'Community Solar Initiatives',
       'Solar Photovoltaic for Residential Applications',
       'Solar Photovoltaic for Commercial Applications',
       'Solar Photovoltaic for Industrial Applications',
       'Solar Photovoltaic in Agriculture',
       'Economic Impact of Solar Photovoltaic Systems',
       'Life Cycle Assessment of Solar Energy Systems'], dtype=object)

In [85]:
def topic_allocation_clngy(sub_topic):
    if pd.isnull(sub_topic):
        return "NA"
    if any(keyword in sub_topic for keyword in SPS):
        return "Solar Photovoltaic Systems"
    elif any(keyword in sub_topic for keyword in WT):
        return "Wind Turbines"
    elif any(keyword in sub_topic for keyword in HP):
        return "Hydropower Plants"
    elif any(keyword in sub_topic for keyword in GE):
        return "Geothermal Energy"
    elif any(keyword in sub_topic for keyword in BE):
        return "Biomass Energy"
    elif any(keyword in sub_topic for keyword in TWE):
        return "Tidal and Wave Energy"
    elif any(keyword in sub_topic for keyword in EST):
        return "Energy Storage Technologies"
    elif any(keyword in sub_topic for keyword in SGT):
        return "Smart Grid Technologies"
    elif any(keyword in sub_topic for keyword in EEL):
        return "Energy-Efficient Lighting"
    elif any(keyword in sub_topic for keyword in EnST):
        return "Energy for sustainable Transportation"
    elif any(keyword in sub_topic for keyword in EEV):
        return "Energy Electric Vehicles"
    elif any(keyword in sub_topic for keyword in GB):
        return "Green Buildings"
    elif any(keyword in sub_topic for keyword in EEA):
        return "Energy-efficient Appliances"
    elif any(keyword in sub_topic for keyword in CCS):
        return "Carbon Capture and Storage"
    elif any(keyword in sub_topic for keyword in GHP):
        return "Green Hydrogen Production"
    elif any(keyword in sub_topic for keyword in SAE):
        return "Sustainable Agriculture Energy"
    elif any(keyword in sub_topic for keyword in CEE):
        return "Circular Economy Energy"
    elif any(keyword in sub_topic for keyword in EFC):
        return "Eco-Friendly Construction Materials"
    elif any(keyword in sub_topic for keyword in OTE):
        return "Ocean Thermal Energy"
    elif any(keyword in sub_topic for keyword in DER):
        return "Distributed Energy Resources"
    elif any(keyword in sub_topic for keyword in MGD):
        return "Microgrids"
    elif any(keyword in sub_topic for keyword in EMS):
        return "Energy Management Systems"
    elif any(keyword in sub_topic for keyword in CEI):
        return "Clean Energy Integration"
    elif any(keyword in sub_topic for keyword in CSP):
        return "Community Solar Projects"
    elif any(keyword in sub_topic for keyword in UPCE):
        return "Urban planning clean energy"
    elif any(keyword in sub_topic for keyword in CR):
        return "Climate Resilience"
    elif any(keyword in sub_topic for keyword in WF):
        return "Wind Farms"
    elif any(keyword in sub_topic for keyword in CES):
        return "Clean Energy skills"
    elif any(keyword in sub_topic for keyword in REC):
        return "Renewable Energy Certification"
    elif any(keyword in sub_topic for keyword in COP):
        return "Carbon Offset Programs"
    else:
        return "Other"


In [86]:
afc_energy_df['Topic'] = afc_energy_df['Assigned Subtopic(Preprocessed)'].apply(topic_allocation_clngy)

In [87]:
afc_energy_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,Assigned Subtopic,QA_prep,QA_prep_vector,Assigned Subtopic(Preprocessed),Topic
0,Affordable and Clean Energy,What is the main advantage of liquid air energ...,The main advantage of LIES is that it is geogr...,Science and Tech,,,,What is the main advantage of liquid air energ...,"[-0.004522498, -0.037984867, 0.036875945, 0.04...",Compressed Air Energy Storage (CAES),main advantage liquid air energy storage lies ...,"[0.0001286425, -0.0215348, 0.03705814, 0.03461...",Pumped Storage Hydropower,Hydropower Plants
1,Affordable and Clean Energy,How can the efficiency of LIES be improved?,The efficiency of LIES can be improved by util...,Management,,,,How can the efficiency of LIES be improved? Th...,"[-0.046298273, -0.0027031552, 0.05756366, 0.00...",,efficiency lies improved efficiency lies impro...,"[-0.05188921, 0.008030583, 0.05519354, 0.01358...",Geothermal Power Plant Efficiency,Geothermal Energy
2,Affordable and Clean Energy,How can the Round Trip Efficiency (RTE) be imp...,The Round Trip Efficiency (RTE) can be improve...,Management,,,,How can the Round Trip Efficiency (RTE) be imp...,"[-0.055615943, -0.003967587, 0.057388723, 0.01...",Geothermal Power Plant Efficiency,round trip efficiency rte improved less lnglae...,"[-0.054776583, -0.004049266, 0.050917335, 0.01...",Geothermal Power Plant Efficiency,Geothermal Energy
3,Affordable and Clean Energy,How does the Solar-LAES-SE system with decoupl...,The Solar-LAES-SE system with decoupled chargi...,Analysis,,,,How does the Solar-LAES-SE system with decoupl...,"[-0.06440674, 0.0287099, 0.071825825, 0.034501...",Solar Inverter Technologies,solarlaesse system decoupled charging discharg...,"[-0.059689205, 0.033795923, 0.055474304, 0.027...",Molten Salt Energy Storage,Energy Storage Technologies
4,Affordable and Clean Energy,Why is implementing renewable energy-based ind...,Implementing renewable energy-based industrial...,Strategy,,,,Why is implementing renewable energy-based ind...,"[-0.039428297, 0.0035552864, 0.008631619, -0.0...",Industrial Energy Management,implementing renewable energybased industrial ...,"[-0.04669596, 0.0006737985, 0.03521731, -0.023...",Solar Photovoltaic for Industrial Applications,Solar Photovoltaic Systems
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1660,Affordable and Clean Energy,How did solar irradiance in Spain compare to t...,Solar irradiance in Spain was 20% to 30% above...,Analysis,,,,How did solar irradiance in Spain compare to t...,"[-0.023560774, -0.0006682332, 0.030136807, 0.0...",,solar irradiance spain compare rest europe apr...,"[-0.03631249, 0.0058690365, 0.034752488, 0.027...",,
1661,Affordable and Clean Energy,What weather conditions in Europe were respons...,The negative phase of the North Atlantic Oscil...,Analysis,,,,What weather conditions in Europe were respons...,"[-0.05348125, 0.013398707, 0.0406969, -0.02428...",,weather conditions europe responsible lowpress...,"[-0.07678591, -0.0010808985, 0.036194973, -0.0...",,
1662,Affordable and Clean Energy,Based on the model constructed using data from...,The model showed that wind energy reduced life...,Science and Tech,,,,Based on the model constructed using data from...,"[-0.034322396, 0.03786056, 0.03920884, 0.00370...",Social and Economic Benefits of Wind Farms,based model constructed using data irish grid ...,"[-0.044054758, 0.023186581, 0.034040935, -0.00...",Lifecycle Analysis of Electric Vehicles,Energy Electric Vehicles
1663,Affordable and Clean Energy,Despite the high share of wind energy on the I...,"Even with up to 70% wind energy on the grid, t...",Analysis,,,,Despite the high share of wind energy on the I...,"[-0.01959131, 0.008530454, -0.0013339912, -0.0...",Small-Scale Wind Turbines,despite high share wind energy irish grid achi...,"[-0.039006848, 0.020299856, 0.0010144438, 0.00...",Integration of Offshore Wind Farms,Wind Turbines


In [89]:
afc_energy_df_striped = afc_energy_df[afc_energy_df['Topic'] != 'NA']

In [91]:
afc_energy_df_striped = afc_energy_df_striped[['Question', 'Answer', 'Macrotopic ', 'Topic',
                                                   'Assigned Subtopic(Preprocessed)', 'Source Url']]

In [92]:
afc_energy_df_striped.rename(columns={'Assigned Subtopic(Preprocessed)': 'Subtopic'}, inplace=True)
afc_energy_df_striped = afc_energy_df_striped.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  afc_energy_df_striped.rename(columns={'Assigned Subtopic(Preprocessed)': 'Subtopic'}, inplace=True)


In [93]:
afc_energy_df_striped

Unnamed: 0,Question,Answer,Macrotopic,Topic,Subtopic,Source Url
0,What is the main advantage of liquid air energ...,The main advantage of LIES is that it is geogr...,Affordable and Clean Energy,Hydropower Plants,Pumped Storage Hydropower,
1,How can the efficiency of LIES be improved?,The efficiency of LIES can be improved by util...,Affordable and Clean Energy,Geothermal Energy,Geothermal Power Plant Efficiency,
2,How can the Round Trip Efficiency (RTE) be imp...,The Round Trip Efficiency (RTE) can be improve...,Affordable and Clean Energy,Geothermal Energy,Geothermal Power Plant Efficiency,
3,How does the Solar-LAES-SE system with decoupl...,The Solar-LAES-SE system with decoupled chargi...,Affordable and Clean Energy,Energy Storage Technologies,Molten Salt Energy Storage,
4,Why is implementing renewable energy-based ind...,Implementing renewable energy-based industrial...,Affordable and Clean Energy,Solar Photovoltaic Systems,Solar Photovoltaic for Industrial Applications,
...,...,...,...,...,...,...
1257,What is the opinion of energy experts regardin...,Energy experts strongly criticize the plans to...,Affordable and Clean Energy,Solar Photovoltaic Systems,Solar Photovoltaic in Agriculture,
1258,What are some concerns raised by energy expert...,Energy experts have raised concerns about the ...,Affordable and Clean Energy,Wind Turbines,Wind Energy Policy and Regulations,
1259,Based on the model constructed using data from...,The model showed that wind energy reduced life...,Affordable and Clean Energy,Energy Electric Vehicles,Lifecycle Analysis of Electric Vehicles,
1260,Despite the high share of wind energy on the I...,"Even with up to 70% wind energy on the grid, t...",Affordable and Clean Energy,Wind Turbines,Integration of Offshore Wind Farms,


In [94]:
afc_energy_df_striped.to_csv('Clean Energy for SUPERCLEANED dataset 23-3-2024.csv')

In [173]:
afc_energy_df.to_csv('Unstriped Clean Energy for SUPERCLEANED dataset 23-3-2024.csv')

# CLIMATE ACTION

In [96]:
climate_action_df = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\Supercleaned demo dataset ready for Topic mod 21 03.xlsx", 
                       sheet_name=3)
climate_action_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author
0,Climate Action,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos..."
1,Climate Action,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos..."
2,Climate Action,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Analysis,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos..."
3,Climate Action,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Management,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos..."
4,Climate Action,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Strategy,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos..."
...,...,...,...,...,...,...,...
769,Climate Action,How does the integration of CTA with vision as...,The integration of CTA (Critical Technology As...,Analysis,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W..."
770,Climate Action,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Management,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W..."
771,Climate Action,What are the key considerations for responsibl...,Steering et al. (2018) emphasize the importanc...,Factual,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W..."
772,Climate Action,How do Stile et al. (2014) advocate for the pr...,Stile et al. (2014) argue that promoting publi...,Ethics and regulation,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W..."


In [97]:
climate_action_df['QA'] = climate_action_df['Question'] + ' ' + climate_action_df['Answer']
climate_action_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA
0,Climate Action,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",Why is sustainability risk gaining importance ...
1,Climate Action,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...","Why is environmental risk, particularly climat..."
2,Climate Action,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Analysis,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How does climate change impact financial perfo...
3,Climate Action,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Management,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How do EU sustainability policies address clim...
4,Climate Action,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Strategy,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How are climate policy goals related to sustai...
...,...,...,...,...,...,...,...,...
769,Climate Action,How does the integration of CTA with vision as...,The integration of CTA (Critical Technology As...,Analysis,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How does the integration of CTA with vision as...
770,Climate Action,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Management,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",In what way can storytelling be used to mobili...
771,Climate Action,What are the key considerations for responsibl...,Steering et al. (2018) emphasize the importanc...,Factual,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",What are the key considerations for responsibl...
772,Climate Action,How do Stile et al. (2014) advocate for the pr...,Stile et al. (2014) argue that promoting publi...,Ethics and regulation,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How do Stile et al. (2014) advocate for the pr...


In [101]:
# Climate change subtopic
clmtaction_subtopic = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\Climate Change macro-topic, topic, sub-topics .xlsx", 
                                 sheet_name=2, skiprows=[1])
clmtaction_subtopic

Unnamed: 0,topic,Subtopic,Description,Keywords
0,Sea level rise,Coastal Erosion,Coastal erosion involves the wearing away of l...,"coastal, erosion, protection, sea walls"
1,Sea level rise,Impact on Low-Lying Islands,Low-lying islands face existential threats fro...,"low-lying islands, sea level rise, adaptation"
2,Sea level rise,Threats to Coastal Infrastructure,Rising sea levels pose risks to coastal infras...,"coastal infrastructure, sea level rise, flood-..."
3,Sea level rise,Saltwater Intrusion into Freshwater Aquifers,Saltwater intrusion contaminates freshwater re...,"saltwater intrusion, freshwater, aquifers"
4,Sea level rise,Loss of Coastal Wetlands,Coastal wetlands are threatened by sea level r...,"coastal wetlands, biodiversity, restoration"
...,...,...,...,...
491,International climate agreements,Bilateral Climate Agreements,Agreements between two countries to work toget...,"bilateral agreements, climate action, collabor..."
492,International climate agreements,Role of Non-Governmental Organizations (NGOs),Highlighting the contributions of NGOs in shap...,"NGOs, climate action, advocacy"
493,International climate agreements,United Nations Climate Change Conferences (COPs),Annual conferences where countries negotiate a...,"COPs, climate change, conferences"
494,International climate agreements,Indigenous Rights and Inclusion in Global Agre...,"Recognizing and incorporating the rights, know...","indigenous rights, climate agreements, inclusion"


In [100]:
# text embedding for climate action corpus
text_embed('QA', climate_action_df, 'QA_vector')

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector
0,Climate Action,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",Why is sustainability risk gaining importance ...,"[-0.015575544, -0.017865686, 0.012622053, -0.0..."
1,Climate Action,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...","Why is environmental risk, particularly climat...","[0.012294093, -0.027403206, 0.043451298, 0.006..."
2,Climate Action,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Analysis,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How does climate change impact financial perfo...,"[-0.015295985, -0.010350421, 0.06548612, 0.036..."
3,Climate Action,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Management,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How do EU sustainability policies address clim...,"[-0.023361845, -0.013595292, 0.017625155, -0.0..."
4,Climate Action,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Strategy,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How are climate policy goals related to sustai...,"[-0.0035610863, -0.0016003839, 0.042024367, -0..."
...,...,...,...,...,...,...,...,...,...
769,Climate Action,How does the integration of CTA with vision as...,The integration of CTA (Critical Technology As...,Analysis,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How does the integration of CTA with vision as...,"[-0.02345577, 0.0058976845, 0.04266114, -0.027..."
770,Climate Action,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Management,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",In what way can storytelling be used to mobili...,"[0.040761076, -0.03606351, 0.038470004, -0.008..."
771,Climate Action,What are the key considerations for responsibl...,Steering et al. (2018) emphasize the importanc...,Factual,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",What are the key considerations for responsibl...,"[-0.017144602, -0.027038224, 0.061683625, -0.0..."
772,Climate Action,How do Stile et al. (2014) advocate for the pr...,Stile et al. (2014) argue that promoting publi...,Ethics and regulation,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How do Stile et al. (2014) advocate for the pr...,"[-0.03248833, 0.020769533, 0.023415921, -0.038..."


In [102]:
# subtopic embedding for climate change
text_embed('Description', clmtaction_subtopic, 'Des_vector')
clmtaction_subtopic

Unnamed: 0,topic,Subtopic,Description,Keywords,Des_vector
0,Sea level rise,Coastal Erosion,Coastal erosion involves the wearing away of l...,"coastal, erosion, protection, sea walls","[-0.031825434, -0.012220559, 0.037745282, -0.0..."
1,Sea level rise,Impact on Low-Lying Islands,Low-lying islands face existential threats fro...,"low-lying islands, sea level rise, adaptation","[-0.055055037, 0.0029631474, 0.008831218, 0.00..."
2,Sea level rise,Threats to Coastal Infrastructure,Rising sea levels pose risks to coastal infras...,"coastal infrastructure, sea level rise, flood-...","[-0.03478838, -0.03702223, 0.04551968, -0.0298..."
3,Sea level rise,Saltwater Intrusion into Freshwater Aquifers,Saltwater intrusion contaminates freshwater re...,"saltwater intrusion, freshwater, aquifers","[-0.033366896, -0.043829467, 0.051283106, -0.0..."
4,Sea level rise,Loss of Coastal Wetlands,Coastal wetlands are threatened by sea level r...,"coastal wetlands, biodiversity, restoration","[-0.0031436805, -0.010280124, 0.02047494, -0.0..."
...,...,...,...,...,...
491,International climate agreements,Bilateral Climate Agreements,Agreements between two countries to work toget...,"bilateral agreements, climate action, collabor...","[-0.057403196, 0.004895146, 0.052354258, 0.004..."
492,International climate agreements,Role of Non-Governmental Organizations (NGOs),Highlighting the contributions of NGOs in shap...,"NGOs, climate action, advocacy","[-0.049948506, 0.014424108, 0.026244527, -0.01..."
493,International climate agreements,United Nations Climate Change Conferences (COPs),Annual conferences where countries negotiate a...,"COPs, climate change, conferences","[-0.05309704, -0.015367571, 0.019939508, 0.031..."
494,International climate agreements,Indigenous Rights and Inclusion in Global Agre...,"Recognizing and incorporating the rights, know...","indigenous rights, climate agreements, inclusion","[-0.015862292, 0.0067283413, 0.049751773, 0.01..."


In [111]:
# Subtopic assignment for climate change dataset
assign_subtopic(climate_action_df, 'QA_vector', clmtaction_subtopic, clmtaction_subtopic['Des_vector'], 'Subtopic', 'Assigned Subtopic')

In [112]:
climate_action_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,QA_prep,QA_prep_vector,Assigned Subtopic(Preprocessed),Assigned Subtopic
0,Climate Action,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",Why is sustainability risk gaining importance ...,"[-0.015575544, -0.017865686, 0.012622053, -0.0...",sustainability risk gaining importance busines...,"[-0.021073606, -0.039178837, 0.015091247, -0.0...",Financial Viability of CCS,
1,Climate Action,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...","Why is environmental risk, particularly climat...","[0.012294093, -0.027403206, 0.043451298, 0.006...",environmental risk particularly climate change...,"[0.0008381564, -0.03459547, 0.05423817, 0.0094...",Intense Rainfall and Flash Flooding,Ice Cap and Glacier Mass Loss
2,Climate Action,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Analysis,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How does climate change impact financial perfo...,"[-0.015295985, -0.010350421, 0.06548612, 0.036...",climate change impact financial performance ac...,"[-0.017736359, -0.007898038, 0.0696104, 0.0370...",Changes in Windstorm Patterns,Climate-Resilient Livelihoods
3,Climate Action,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Management,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How do EU sustainability policies address clim...,"[-0.023361845, -0.013595292, 0.017625155, -0.0...",eu sustainability policies address climate cha...,"[-0.029540634, -0.00070302305, 0.020287389, -0...",Green Finance for GHG Reduction Projects,Promotion of Renewable Energy Policies
4,Climate Action,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Strategy,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How are climate policy goals related to sustai...,"[-0.0035610863, -0.0016003839, 0.042024367, -0...",climate policy goals related sustainable devel...,"[-0.023365559, 0.0012606684, 0.065723896, -0.0...",Global Greenhouse Gas Emission Targets,Climate Agreements and Sustainable Development...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,Climate Action,How does the integration of CTA with vision as...,The integration of CTA (Critical Technology As...,Analysis,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How does the integration of CTA with vision as...,"[-0.02345577, 0.0058976845, 0.04266114, -0.027...",integration cta vision assessment help explore...,"[-0.02349394, 0.008930604, 0.038689557, -0.022...",,Role of Technology in Biodiversity Monitoring
770,Climate Action,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Management,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",In what way can storytelling be used to mobili...,"[0.040761076, -0.03606351, 0.038470004, -0.008...",way storytelling used mobilize situated local ...,"[0.030992633, -0.030203486, 0.045347907, -0.00...",Localized Climate Action Initiatives,Localized Climate Action Initiatives
771,Climate Action,What are the key considerations for responsibl...,Steering et al. (2018) emphasize the importanc...,Factual,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",What are the key considerations for responsibl...,"[-0.017144602, -0.027038224, 0.061683625, -0.0...",key considerations responsible governance inno...,"[-0.03359087, -0.026973063, 0.058436006, -0.05...",,
772,Climate Action,How do Stile et al. (2014) advocate for the pr...,Stile et al. (2014) argue that promoting publi...,Ethics and regulation,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How do Stile et al. (2014) advocate for the pr...,"[-0.03248833, 0.020769533, 0.023415921, -0.038...",stile et al advocate promotion public engageme...,"[-0.024278762, 0.028534295, 0.0331938, -0.0421...",Role of Citizen Science in Biodiversity Monito...,Role of Citizen Science in Biodiversity Monito...


In [105]:
climate_action_df['QA_prep'] = climate_action_df['QA'].apply(preprocess_text)

In [106]:
clmtaction_subtopic['Des_prep'] = clmtaction_subtopic['Description'].apply(preprocess_text)

In [107]:
# text embedding for preprocessed climate change corpus
text_embed('QA_prep', climate_action_df, 'QA_prep_vector')

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,QA_prep,QA_prep_vector
0,Climate Action,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",Why is sustainability risk gaining importance ...,"[-0.015575544, -0.017865686, 0.012622053, -0.0...",sustainability risk gaining importance busines...,"[-0.021073606, -0.039178837, 0.015091247, -0.0..."
1,Climate Action,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...","Why is environmental risk, particularly climat...","[0.012294093, -0.027403206, 0.043451298, 0.006...",environmental risk particularly climate change...,"[0.0008381564, -0.03459547, 0.05423817, 0.0094..."
2,Climate Action,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Analysis,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How does climate change impact financial perfo...,"[-0.015295985, -0.010350421, 0.06548612, 0.036...",climate change impact financial performance ac...,"[-0.017736359, -0.007898038, 0.0696104, 0.0370..."
3,Climate Action,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Management,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How do EU sustainability policies address clim...,"[-0.023361845, -0.013595292, 0.017625155, -0.0...",eu sustainability policies address climate cha...,"[-0.029540634, -0.00070302305, 0.020287389, -0..."
4,Climate Action,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Strategy,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How are climate policy goals related to sustai...,"[-0.0035610863, -0.0016003839, 0.042024367, -0...",climate policy goals related sustainable devel...,"[-0.023365559, 0.0012606684, 0.065723896, -0.0..."
...,...,...,...,...,...,...,...,...,...,...,...
769,Climate Action,How does the integration of CTA with vision as...,The integration of CTA (Critical Technology As...,Analysis,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How does the integration of CTA with vision as...,"[-0.02345577, 0.0058976845, 0.04266114, -0.027...",integration cta vision assessment help explore...,"[-0.02349394, 0.008930604, 0.038689557, -0.022..."
770,Climate Action,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Management,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",In what way can storytelling be used to mobili...,"[0.040761076, -0.03606351, 0.038470004, -0.008...",way storytelling used mobilize situated local ...,"[0.030992633, -0.030203486, 0.045347907, -0.00..."
771,Climate Action,What are the key considerations for responsibl...,Steering et al. (2018) emphasize the importanc...,Factual,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",What are the key considerations for responsibl...,"[-0.017144602, -0.027038224, 0.061683625, -0.0...",key considerations responsible governance inno...,"[-0.03359087, -0.026973063, 0.058436006, -0.05..."
772,Climate Action,How do Stile et al. (2014) advocate for the pr...,Stile et al. (2014) argue that promoting publi...,Ethics and regulation,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How do Stile et al. (2014) advocate for the pr...,"[-0.03248833, 0.020769533, 0.023415921, -0.038...",stile et al advocate promotion public engageme...,"[-0.024278762, 0.028534295, 0.0331938, -0.0421..."


In [108]:
# subtopic embedding for preprocessed climate change description
text_embed('Des_prep', clmtaction_subtopic, 'Des_prep_vector')

Unnamed: 0,topic,Subtopic,Description,Keywords,Des_vector,Des_prep,Des_prep_vector
0,Sea level rise,Coastal Erosion,Coastal erosion involves the wearing away of l...,"coastal, erosion, protection, sea walls","[-0.031825434, -0.012220559, 0.037745282, -0.0...",coastal erosion involves wearing away land rem...,"[-0.02868289, -0.0055619627, 0.026282977, -0.0..."
1,Sea level rise,Impact on Low-Lying Islands,Low-lying islands face existential threats fro...,"low-lying islands, sea level rise, adaptation","[-0.055055037, 0.0029631474, 0.008831218, 0.00...",lowlying islands face existential threats risi...,"[-0.050886992, -0.009998705, 0.0025397737, -0...."
2,Sea level rise,Threats to Coastal Infrastructure,Rising sea levels pose risks to coastal infras...,"coastal infrastructure, sea level rise, flood-...","[-0.03478838, -0.03702223, 0.04551968, -0.0298...",rising sea levels pose risks coastal infrastru...,"[-0.028306216, -0.037292987, 0.04857379, -0.03..."
3,Sea level rise,Saltwater Intrusion into Freshwater Aquifers,Saltwater intrusion contaminates freshwater re...,"saltwater intrusion, freshwater, aquifers","[-0.033366896, -0.043829467, 0.051283106, -0.0...",saltwater intrusion contaminates freshwater re...,"[-0.046043143, -0.03874169, 0.042635817, -0.00..."
4,Sea level rise,Loss of Coastal Wetlands,Coastal wetlands are threatened by sea level r...,"coastal wetlands, biodiversity, restoration","[-0.0031436805, -0.010280124, 0.02047494, -0.0...",coastal wetlands threatened sea level rise lea...,"[-0.012325102, -0.012568952, 0.025065618, -0.0..."
...,...,...,...,...,...,...,...
491,International climate agreements,Bilateral Climate Agreements,Agreements between two countries to work toget...,"bilateral agreements, climate action, collabor...","[-0.057403196, 0.004895146, 0.052354258, 0.004...",agreements two countries work together specifi...,"[-0.056429412, 0.009538388, 0.057691716, 0.002..."
492,International climate agreements,Role of Non-Governmental Organizations (NGOs),Highlighting the contributions of NGOs in shap...,"NGOs, climate action, advocacy","[-0.049948506, 0.014424108, 0.026244527, -0.01...",highlighting contributions ngos shaping advoca...,"[-0.06789759, 0.012626651, 0.007876329, -0.026..."
493,International climate agreements,United Nations Climate Change Conferences (COPs),Annual conferences where countries negotiate a...,"COPs, climate change, conferences","[-0.05309704, -0.015367571, 0.019939508, 0.031...",annual conferences countries negotiate assess ...,"[-0.062138118, 0.0054841363, 0.023442823, 0.03..."
494,International climate agreements,Indigenous Rights and Inclusion in Global Agre...,"Recognizing and incorporating the rights, know...","indigenous rights, climate agreements, inclusion","[-0.015862292, 0.0067283413, 0.049751773, 0.01...",recognizing incorporating rights knowledge per...,"[-0.03817889, 0.01594308, 0.053786807, -0.0073..."


In [109]:
#Subtopic assignment for smart cities dataset
assign_subtopic(climate_action_df, 'QA_prep_vector', clmtaction_subtopic, clmtaction_subtopic['Des_prep_vector'], 'Subtopic', 'Assigned Subtopic(Preprocessed)')

In [110]:
climate_action_df

Unnamed: 0,Macrotopic,Question,Answer,Label,Source Title,Source Url,Source Author,QA,QA_vector,QA_prep,QA_prep_vector,Assigned Subtopic(Preprocessed)
0,Climate Action,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",Why is sustainability risk gaining importance ...,"[-0.015575544, -0.017865686, 0.012622053, -0.0...",sustainability risk gaining importance busines...,"[-0.021073606, -0.039178837, 0.015091247, -0.0...",Financial Viability of CCS
1,Climate Action,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Ethics and regulation,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...","Why is environmental risk, particularly climat...","[0.012294093, -0.027403206, 0.043451298, 0.006...",environmental risk particularly climate change...,"[0.0008381564, -0.03459547, 0.05423817, 0.0094...",Intense Rainfall and Flash Flooding
2,Climate Action,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Analysis,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How does climate change impact financial perfo...,"[-0.015295985, -0.010350421, 0.06548612, 0.036...",climate change impact financial performance ac...,"[-0.017736359, -0.007898038, 0.0696104, 0.0370...",Changes in Windstorm Patterns
3,Climate Action,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Management,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How do EU sustainability policies address clim...,"[-0.023361845, -0.013595292, 0.017625155, -0.0...",eu sustainability policies address climate cha...,"[-0.029540634, -0.00070302305, 0.020287389, -0...",Green Finance for GHG Reduction Projects
4,Climate Action,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Strategy,PDF1_Climate_policy_development_and_implementa...,https://www.sciencedirect.com/science/article/...,"Krzysztof Plaza a, Magdalena Solo b, Marta Pos...",How are climate policy goals related to sustai...,"[-0.0035610863, -0.0016003839, 0.042024367, -0...",climate policy goals related sustainable devel...,"[-0.023365559, 0.0012606684, 0.065723896, -0.0...",Global Greenhouse Gas Emission Targets
...,...,...,...,...,...,...,...,...,...,...,...,...
769,Climate Action,How does the integration of CTA with vision as...,The integration of CTA (Critical Technology As...,Analysis,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How does the integration of CTA with vision as...,"[-0.02345577, 0.0058976845, 0.04266114, -0.027...",integration cta vision assessment help explore...,"[-0.02349394, 0.008930604, 0.038689557, -0.022...",
770,Climate Action,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Management,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",In what way can storytelling be used to mobili...,"[0.040761076, -0.03606351, 0.038470004, -0.008...",way storytelling used mobilize situated local ...,"[0.030992633, -0.030203486, 0.045347907, -0.00...",Localized Climate Action Initiatives
771,Climate Action,What are the key considerations for responsibl...,Steering et al. (2018) emphasize the importanc...,Factual,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",What are the key considerations for responsibl...,"[-0.017144602, -0.027038224, 0.061683625, -0.0...",key considerations responsible governance inno...,"[-0.03359087, -0.026973063, 0.058436006, -0.05...",
772,Climate Action,How do Stile et al. (2014) advocate for the pr...,Stile et al. (2014) argue that promoting publi...,Ethics and regulation,Guiding visions of corporate smart city innova...,https://www.sciencedirect.com/science/article/...,"Marjolein G. van der Meir, Make Fragile, J.E.W...",How do Stile et al. (2014) advocate for the pr...,"[-0.03248833, 0.020769533, 0.023415921, -0.038...",stile et al advocate promotion public engageme...,"[-0.024278762, 0.028534295, 0.0331938, -0.0421...",Role of Citizen Science in Biodiversity Monito...


In [160]:
subs_climatechg = pd.read_excel(r"C:\Users\RedHat\Downloads\Tiiqu\Climate Change macro-topic, topic, sub-topics .xlsx", 
                                 sheet_name=0)
subs_climatechg

Unnamed: 0,Global Warming,Sea Level Rise,Carbon Footprint Reduction,Sustainable Agriculture Practices,Deforestation and Reforestation,Melting Ice Caps and Glaciers,Extreme Weather Events,Biodiversity Conservation,Ocean Acidification,Climate Resilient Infrastructure,Greenhouse Gas Emission Reduction,Climate Change Adaptation Strategies,Clean Energy Transition,Sustainable Transportation,circular Economy Initiatives,Climate Justice and Equity,carbon capture and Storage,International Climate agreement
0,Greenhouse Gas Emissions,Coastal Erosion,Renewable Energy Adoption,Precision Agriculture Technologies,Tropical Rainforest Conservation,Arctic Sea Ice Retreat,Intensification of Hurricanes,Habitat Restoration Projects,Impact on Coral Reefs,Resilient urban planning,Renewable Energy Transition,Resilient Water Management,Renewable Energy Integration,electrification of Public Transportation,Adoption of Sustainable Product Design,Environmental Justice Advocacy,Direct Air Capture Technologies,Paris Agreement Implementation
1,Rising Average Global Temperatures,Impact on Low-Lying Islands,Energy-Efficient Transportation,Organic Farming Methods,Impact of Logging on Biodiversity,Antarctic Ice Sheet Dynamics,Increased Frequency of Heatwaves,Threatened and Endangered Species Protection,Acidification Effects on Shellfish,Infrastructure Risk Assessment,Energy Efficiency Measures,Coastal Zone Protection,Transition to Solar Power,Adoption of Electric Vehicles,Closed-Loop Material Recycling,Fair Distribution of Climate Impacts,Enhanced Oil Recovery with carbon capture,Nationally Determined Contributions (NDCs)
2,Impact on Polar Ice Caps,Threats to Coastal Infrastructure,Green building Practices,Conservation Tillage Techniques,Afforestation Initiatives,Glacier Mass Balance,Changes in Precipitation Patterns,Conservation Genetics,Changes in Marine Food Webs,Flood-Resistant building s,carbon capture and Storage (CCS),Early Warning Systems for Extreme Weather,Adoption of Wind Energy,Green Urban Mobility Planning,circular packaging Solutions,Indigenous Rights in Climate Action,Geological Storage of Captured Carbon,Global Greenhouse Gas Emission Targets
3,Ocean Temperature Increase,Saltwater Intrusion into Freshwater Aquifers,circular Economy Initiatives,Drought-Resistant crop Varieties,Causes and Consequences of Deforestation,Permafrost Thawing,Coastal and Inland Flooding,Wildlife Corridor Establishment,Carbonate Ion Saturation Levels,Climate-Resilient Transportation Systems,Sustainable Transportation Solutions,Ecosystem-Based Adaptation,Hydropower Generation,Development of Bicycle Infrastructure,circular Fashion and Textiles,Access to Clean Energy for All,carbon capture Utilization and Storage (CCUS),Climate Finance Commitments
4,Extreme Weather Patterns,Loss of Coastal Wetlands,Afforestation and Reforestation,Water-Efficient Irrigation Systems,Reforestation as a Carbon Sink,Glacier Calving Events,Wildfire Intensity and Frequency,Marine Protected Areas,Acidification and Marine Microorganisms,Green Infrastructure Design,Afforestation and Reforestation Projects,Climate-Resilient Agriculture Practices,Biomass and Bioenergy Solutions,Eco-friendly Urban Design,Reusable and Recyclable Plastics,Gender Equality in Climate Policies,Carbon Sequestration in Forests,Adaptation and Resilience Goals
5,Melting Glaciers and Ice Sheets,Increased Flooding Events,Sustainable Agriculture Practices,Soil Carbon Sequestration,Sustainable Logging Practices,Iceberg Formation and Movement,Tropical Cyclone Intensity,Indigenous Knowledge in Biodiversity Conservation,Ocean Acidification Monitoring Techniques,Seawall and Coastal Protection,Methane Emission Reduction Strategies,Infrastructure Resilience Planning,Tidal and Wave Energy Projects,Low-Emission Public Fleets,Extended Producer Responsibility Programs,Climate Refugees and Displacement,Bioenergy with carbon capture and Storage (BECCS),Technology Transfer for Climate Action
6,Sea Level Rise,Effects on Coastal Agriculture,Eco-friendly Consumer Choices,Cover Cropping and crop Rotation,Forest Restoration Techniques,Impact on Sea Level Rise,Changes in Tornado Patterns,Sustainable forestry Practices,Impact on Commercial Fisheries,Stormwater Management,Green building Standards,Community-Based Adaptation,Transitioning to Clean Hydrogen,Sustainable aviation Biofuels,Product Life Extension Strategies,Fair Distribution of Climate Finance,Ocean-Based carbon capture Methods,"Monitoring, Reporting, and Verification (MRV)"
7,Heatwaves and Heat-Related Illnesses,Migration of Coastal Communities,Carbon offset Programs,Integrated Pest Management (IPM),Indigenous Practices in Forest Conservation,Changes in Glacier Surface Albedo,Expansion of Desertification,Invasive Species Management,Influence on Marine Biodiversity,Adaptive Water Supply Systems,Livestock Methane Reduction,Health Sector Adaptation,Energy Storage Technologies,Eco-friendly Transportation Policies,E-Waste Recycling and Upcycling,Inclusion of Marginalized Communities,carbon capture in Industrial Processes,Carbon Market Mechanisms
8,Ocean Acidification,Changes in Coastal Biodiversity,Energy-Efficient Appliances,Climate-Smart crop Selection,Urban Reforestation Programs,Glacier Monitoring Technologies,Impact on Monsoon Systems,Protected Area Connectivity,Acidification Effects on Plankton,Resilient Energy Grids,Low-Carbon Agriculture Practices,Indigenous Knowledge in Adaptation,Decentralized Energy Systems,Integration of Smart Transportation Systems,Eco-friendly Waste-to-Energy Technologies,Intersectionality in Climate Activism,Carbon Mineralization and Storage,Loss and Damage Mechanism
9,Shifts in Wildlife Migration Patterns,Altered Ocean Currents,Sustainable Waste Management,Sustainable Livestock Farming,Agroforestry for Sustainable Land Use,Glacier Meltwater Contributions to Oceans,Changes in Snowfall Patterns,Conservation Breeding Programs,Role of Ocean Acidification in Coral Bleaching,Climate-Resilient Housing,Waste-to-Energy Technologies,Climate-Resilient building Codes,Energy Efficiency Measures,Carpooling and Ridesharing Initiatives,circular Agriculture Practices,Community-Led Climate Adaptation,Microbial carbon capture Technologies,Capacity building for Developing Nations


In [161]:
subs_climatechg.columns

Index(['Global Warming', 'Sea Level Rise', 'Carbon Footprint Reduction',
       'Sustainable Agriculture Practices', 'Deforestation and Reforestation',
       'Melting Ice Caps and Glaciers', 'Extreme Weather Events',
       'Biodiversity Conservation', 'Ocean Acidification',
       'Climate Resilient Infrastructure', 'Greenhouse Gas Emission Reduction',
       'Climate Change Adaptation Strategies', 'Clean Energy Transition',
       'Sustainable Transportation', 'circular Economy Initiatives',
       'Climate Justice and Equity', 'carbon capture and Storage',
       'International Climate agreement '],
      dtype='object')

In [162]:
GW = subs_climatechg['Global Warming'].dropna().values
SLR = subs_climatechg['Sea Level Rise'].dropna().values
CFR = subs_climatechg['Carbon Footprint Reduction'].dropna().values
SAP = subs_climatechg['Sustainable Agriculture Practices'].dropna().values
DAR = subs_climatechg['Deforestation and Reforestation'].dropna().values
MCG = subs_climatechg['Melting Ice Caps and Glaciers'].dropna().values
EWE = subs_climatechg['Extreme Weather Events'].dropna().values
BC = subs_climatechg['Biodiversity Conservation'].dropna().values
OA = subs_climatechg['Ocean Acidification'].dropna().values
CRI = subs_climatechg['Climate Resilient Infrastructure'].dropna().values
GER = subs_climatechg['Greenhouse Gas Emission Reduction'].dropna().values
CAS = subs_climatechg['Climate Change Adaptation Strategies'].dropna().values
CET = subs_climatechg['Clean Energy Transition'].dropna().values
ST = subs_climatechg['Sustainable Transportation'].dropna().values
CEI = subs_climatechg['circular Economy Initiatives'].dropna().values
CJE = subs_climatechg['Climate Justice and Equity'].dropna().values
CCS = subs_climatechg['carbon capture and Storage'].dropna().values
ICA = subs_climatechg['International Climate agreement '].dropna().values

In [163]:
GW

array(['Greenhouse Gas Emissions', 'Rising Average Global Temperatures',
       'Impact on Polar Ice Caps', 'Ocean Temperature Increase',
       'Extreme Weather Patterns', 'Melting Glaciers and Ice Sheets',
       'Sea Level Rise', 'Heatwaves and Heat-Related Illnesses',
       'Ocean Acidification', 'Shifts in Wildlife Migration Patterns',
       'Disruption of Ecosystems',
       'Effects on Agriculture and Food Security',
       'Threats to Coral Reefs',
       'Increased Intensity of Hurricanes/Cyclones',
       'Impact on Arctic and Antarctic Wildlife',
       'Permafrost Thaw and Release of Methane',
       'Rising Sea Surface Temperatures', 'Threats to Biodiversity',
       'Changes in Ocean Circulation', 'Impact on Coastal Communities',
       'Spread of Vector-Borne Diseases', 'Acid Rain Formation',
       'Changes in Weather Extremes', 'Altered Patterns of Snowfall',
       'Risks to Low-Lying Islands', 'Ocean Current Changes',
       'Shifts in Wildlife Habitats',
       'C

In [164]:
def topic_allocation_clmtactn(sub_topic):
    if pd.isnull(sub_topic):
        return "NA"
    if any(keyword in sub_topic for keyword in GW):
        return "Global Warming"
    elif any(keyword in sub_topic for keyword in SLR):
        return "Sea Level Rise"
    elif any(keyword in sub_topic for keyword in CFR):
        return "Carbon Footprint Reduction"
    elif any(keyword in sub_topic for keyword in SAP):
        return "Sustainable Agriculture Practices"
    elif any(keyword in sub_topic for keyword in DAR):
        return "Deforestation and Reforestation"
    elif any(keyword in sub_topic for keyword in MCG):
        return "Melting Ice Caps and Glaciers"
    elif any(keyword in sub_topic for keyword in EWE):
        return "Extreme Weather Events"
    elif any(keyword in sub_topic for keyword in BC):
        return "Biodiversity Conservation"
    elif any(keyword in sub_topic for keyword in OA):
        return "Ocean Acidification"
    elif any(keyword in sub_topic for keyword in CRI):
        return "Climate Resilient Infrastructure"
    elif any(keyword in sub_topic for keyword in GER):
        return "Greenhouse Gas Emission Reduction'"
    elif any(keyword in sub_topic for keyword in CAS):
        return "Climate Change Adaptation Strategies"
    elif any(keyword in sub_topic for keyword in CET):
        return "Clean Energy Transition"
    elif any(keyword in sub_topic for keyword in ST):
        return "Sustainable Transportation"
    elif any(keyword in sub_topic for keyword in CEI):
        return "circular Economy Initiatives"
    elif any(keyword in sub_topic for keyword in CJE):
        return "Climate Justice and Equity"
    elif any(keyword in sub_topic for keyword in CCS):
        return "carbon capture and Storage"
    elif any(keyword in sub_topic for keyword in ICA):
        return "International Climate Agreements"
    else:
        return "Other"

In [165]:
climate_action_df['Topic'] = climate_action_df['Assigned Subtopic(Preprocessed)'].apply(topic_allocation_clmtactn)

In [166]:
climate_action_df_striped = climate_action_df[climate_action_df['Topic'] != 'NA']

In [167]:
climate_action_df_striped = climate_action_df_striped[['Question', 'Answer', 'Macrotopic ', 'Topic',
                                                   'Assigned Subtopic(Preprocessed)', 'Source Url']]

In [168]:
climate_action_df_striped.rename(columns={'Assigned Subtopic(Preprocessed)': 'Subtopic'}, inplace=True)
climate_action_df_striped = climate_action_df_striped.reset_index(drop=True)

In [169]:
climate_action_df_striped

Unnamed: 0,Question,Answer,Macrotopic,Topic,Subtopic,Source Url
0,Why is sustainability risk gaining importance ...,"Sustainability risk, or ESG risk, is gaining i...",Climate Action,carbon capture and Storage,Financial Viability of CCS,https://www.sciencedirect.com/science/article/...
1,"Why is environmental risk, particularly climat...","Environmental risk, especially climate change,...",Climate Action,Extreme Weather Events,Intense Rainfall and Flash Flooding,https://www.sciencedirect.com/science/article/...
2,How does climate change impact financial perfo...,Studies and reports indicate that climate chan...,Climate Action,Extreme Weather Events,Changes in Windstorm Patterns,https://www.sciencedirect.com/science/article/...
3,How do EU sustainability policies address clim...,EU sustainability policies address climate cha...,Climate Action,Greenhouse Gas Emission Reduction',Green Finance for GHG Reduction Projects,https://www.sciencedirect.com/science/article/...
4,How are climate policy goals related to sustai...,Climate policy goals are in line with sustaina...,Climate Action,International Climate Agreements,Global Greenhouse Gas Emission Targets,https://www.sciencedirect.com/science/article/...
...,...,...,...,...,...,...
543,How can scenario planning be implemented in th...,Scenario planning can be effectively implement...,Climate Action,Sustainable Transportation,Green Urban Logistics,https://www.sciencedirect.com/science/article/...
544,What is the main purpose of the digital platfo...,The main purpose of the digital platform Get T...,Climate Action,Other,Resilient Urban Planning,https://www.sciencedirect.com/science/article/...
545,How does Get Together contribute to urban soci...,Get Together contributes to urban social parti...,Climate Action,Climate Justice and Equity,Localized Climate Action Initiatives,https://www.sciencedirect.com/science/article/...
546,In what way can storytelling be used to mobili...,Storytelling can be a powerful tool in partici...,Climate Action,Climate Justice and Equity,Localized Climate Action Initiatives,https://www.sciencedirect.com/science/article/...


In [171]:
climate_action_df_striped.to_csv('Climate Change for SUPERCLEANED dataset 23-3-2024.csv')

In [172]:
climate_action_df.to_csv('Unstriped Climate Change for SUPERCLEANED dataset 23-3-2024.csv')