## DATASETS CREATED FOR INCIDENTALLY
### Important: Code has not been cleaned

In [None]:
import pandas as pd
import os
import json
import xml.etree.ElementTree as ET

In [None]:
df_bills = pd.read_csv('df_bills_fv.tsv', sep='\t')
cols = ['congress', 'bill_number']
df_bill = df_bills[cols]
df_bill[['bill', 'number']] = df_bill['bill_number'].str.extract(r'([a-zA-Z]+)([0-9]+)')

df_bill = df_bill.drop_duplicates()

In [None]:
def get_cosponsor(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            cosponsors = root.find('.//cosponsors')
            if cosponsors is not None:
                name_list = []
                for name in cosponsors.findall('.//fullName'):
                    if name is not None:
                        name = name.text.strip()
                        name_list.append(name)
                    else:
                        print('Text element not found in XML.')
                name_list = list(set(name_list))
                return name_list
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:
            file_path = f"congress/{congress}/{bill}/{bill_number}/data.json"
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    if 'cosponsors' in data:
                        cosponsors = [sponsor['name'] for sponsor in data['cosponsors']]
                        return cosponsors
                    else:
                        print('No sponsors found for the bill:', congress, bill_number)
                        return None
            else:
                print('Following bill could not be found:', congress, bill_number)
                return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None
df_bill['cosponsors'] = df_bill.apply(lambda row: get_cosponsor(row['congress'], row['bill'], row['bill_number']), axis=1)

def get_cosponsor_id(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            cosponsors = root.find('.//cosponsors')
            if cosponsors is not None:
                id_list = []
                for id in cosponsors.findall('.//bioguideId'):
                    if id is not None:
                        id = id.text.strip()
                        id_list.append(id)
                    else:
                        print('Text element not found in XML.')
                id_list = list(set(id_list))
                return id_list
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:
            file_path = f"congress/{congress}/{bill}/{bill_number}/data.json"
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    if 'cosponsors' in data:
                        cosponsors = [sponsor['thomas_id'] for sponsor in data['cosponsors']]
                        return cosponsors
                    else:
                        print('No sponsors found for the bill:', congress, bill_number)
                        return None
            else:
                print('Following bill could not be found:', congress, bill_number)
                return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None
df_bill['cosponsors_id'] = df_bill.apply(lambda row: get_cosponsor_id(row['congress'], row['bill'], row['bill_number']), axis=1)

def get_cosponsor_party(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            cosponsors = root.find('.//cosponsors')
            if cosponsors is not None:
                party_list = []
                for party in cosponsors.findall('.//party'):
                    if party is not None:
                        party = party.text.strip()
                        party_list.append(party)
                    else:
                        print('Text element not found in XML.')
                party_list = list(set(party_list))
                return party_list
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:
            print('Following bill could not be found:', congress, bill_number)
            return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None
df_bill['cosponsors_party'] = df_bill.apply(lambda row: get_cosponsor_party(row['congress'], row['bill'], row['bill_number']), axis=1)

In [None]:
def get_sponsor(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            sponsors = root.find('.//sponsors')
            if sponsors is not None:
                name_list = []
                for name in sponsors.findall('.//fullName'):
                    if name is not None:
                        name = name.text.strip()
                        name_list.append(name)
                    else:
                        print('Text element not found in XML.')
                name_list = list(set(name_list))
                return name_list
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:        
            file_path = f"congress/{congress}/{bill}/{bill_number}/data.json"
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    name = data['sponsor']['name']
                    return name
            else:
                print('Following bill could not be found:', congress, bill_number)
                return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None
df_bill['sponsors'] = df_bill.apply(lambda row: get_sponsor(row['congress'], row['bill'], row['bill_number']), axis=1)

def get_sponsor_id(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            sponsors = root.find('.//sponsors')
            if sponsors is not None:
                id_list = []
                for id in sponsors.findall('.//bioguideId'):
                    if id is not None:
                        id = id.text.strip()
                        id_list.append(id)
                    else:
                        print('Text element not found in XML.')
                id_list = list(set(id_list))
                return id_list
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:        
            file_path = f"congress/{congress}/{bill}/{bill_number}/data.json"
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    id = data['sponsor']['thomas_id']
                    return id
            else:
                print('Following bill could not be found:', congress, bill_number)
                return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None
df_bill['sponsors_id'] = df_bill.apply(lambda row: get_sponsor_id(row['congress'], row['bill'], row['bill_number']), axis=1)

def get_sponsor_party(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            sponsors = root.find('.//sponsors')
            if sponsors is not None:
                party_list = []
                for party in sponsors.findall('.//party'):
                    if party is not None:
                        party = party.text.strip()
                        party_list.append(party)
                    else:
                        print('Text element not found in XML.')
                party_list = list(set(party_list))
                return party_list
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:
            print('Following bill could not be found:', congress, bill_number)
            return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None
df_bill['sponsors_party'] = df_bill.apply(lambda row: get_sponsor_party(row['congress'], row['bill'], row['bill_number']), axis=1)

In [None]:
df_bill_112 = df_bill[df_bill['congress'] < 113]
df_bill_113 = df_bill[df_bill['congress'] > 112]

In [None]:
df_bill_112['sponsor_list'] = df_bill_112.apply(lambda row: row['cosponsors'] + [row['sponsors']], axis=1)
df_bill_112

In [None]:
df_bill_112['sponsorid_list'] = df_bill_112.apply(lambda row: row['cosponsors_id'] + [row['sponsors_id']], axis=1)
df_bill_112

In [None]:
df_bill_112.drop(columns=['cosponsors', 'sponsors', 'cosponsors_id', 'sponsors_id'], inplace=True)
df_bill_112

In [None]:
df_bill_112.drop(columns=['cosponsors_party', 'sponsors_party'], inplace=True)
df_bill_112

In [None]:
df_bill_112['conbill'] = df_bill_112['congress'].astype(str) + '-' + df_bill_112['bill_number']
df_bill_112

In [None]:
df_bill_112[df_bill_112['congress'] == 111]

In [None]:
all_bills = df_bill['conbill'].unique()
all_sponsors = list(set(name for sublist in df_bill['sponsor_list'] for name in sublist))

# Create a matrix DataFrame with rows representing sponsors and columns representing bills
matrix = pd.DataFrame(0, index=all_sponsors, columns=all_bills)

# Iterate over each row in the original DataFrame and update the matrix DataFrame accordingly
for idx, row in df_bill.iterrows():
    bill_number = row['conbill']
    sponsors = row['sponsor_list']
    for sponsor in sponsors:
        matrix.loc[sponsor, bill_number] = 1

matrix_filename = f'matrix_congress_108.csv'
matrix.to_csv(matrix_filename)

In [None]:
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

for congress, sub_df in df_bill_112.groupby('congress'):
    # Generate a matrix for the sub-DataFrame
    all_bills = sub_df['bill_number'].unique()
    all_sponsors = list(set(name for sublist in sub_df['sponsor_list'] for name in sublist))
    matrix = pd.DataFrame(0, index=all_sponsors, columns=all_bills)
    for idx, row in sub_df.iterrows():
        bill_number = row['bill_number']
        sponsors = row['sponsor_list']
        for sponsor in sponsors:
            matrix.loc[sponsor, bill_number] = 1
    
    # Save the matrix as a CSV file
    matrix_filename = f'matrix_congress_{congress}.csv'
    matrix.to_csv(matrix_filename)
    print(f"Matrix for Congress {congress} saved as '{matrix_filename}'")

In [None]:
df_legis = pd.read_csv('legislators-current.csv', sep=',')
df_legis_his = pd.read_csv('legislators-historical.csv', sep=',')
cols = ['thomas_id', 'party', 'full_name']
df_legis = df_legis[cols]
df_legis_his = df_legis_his[cols]
df_legis = pd.concat([df_legis, df_legis_his], ignore_index=True)
df_legis = df_legis.dropna(subset=['thomas_id'])
df_legis

In [None]:
df_legis['thomas_id'] = df_legis['thomas_id'].astype(int).astype(str).str.zfill(5)
df_legis

In [None]:
df_legislators = pd.DataFrame(columns=['congress', 'name', 'thomas_id'])

for index, row in df_bill_112.iterrows():
    values_a = row['sponsor_list']
    values_b = row['sponsorid_list']
    congress = row['congress']

    for value_a, value_b in zip(values_a, values_b):
        df_legislators.loc[len(df_legislators)] = [congress, value_a, value_b]

df_legislators

In [None]:
df_legislators = df_legislators.drop_duplicates()
df_legislators

In [None]:
df_l = pd.merge(df_legislators, df_legis, on=['thomas_id'], how='left')
df_l

In [None]:
df_l.loc[df_l['thomas_id'] == '02029', 'party'] = 'Republican'

In [None]:
df_l.drop(columns=['full_name'], inplace=True)
df_l

In [None]:
party_name = {'Democrat': 'D', 'Republican': 'R', 'Conservative': 'R', 'Popular Democrat': 'D', 'Independent': 'I', 'Democrat-Liberal': 'D', 
              'Republican-Conservative': 'R', 'New Progressive': 'D'}

df_l['party'] = df_l['party'].map(party_name)
df_l['party'].unique()

In [None]:
df_l = df_l.dropna(subset=['name'])

In [None]:
for congress, sub_df in df_l.groupby('congress'):
    sub_df.to_csv(f'legislators_{congress}.csv', index=False)
    print(f"file for Congress {congress} saved'")

In [None]:
df_bill_112

In [None]:
def extract_last_item(lst):
    if lst:
        return lst[-1]
    else:
        return None

# Apply the function to each row and assign the result to a new column
df_bill_112['thomas_id'] = df_bill_112['sponsorid_list'].apply(extract_last_item)
df_bill_112

In [None]:
df_bill_112[df_bill_112['thomas_id'].isna()]

In [None]:
cols = ['thomas_id', 'party']
df = df_l[cols]
df = df.drop_duplicates()
bills = pd.merge(df_bill_112, df, on=['thomas_id'], how='left')
bills

In [None]:
bills.drop(columns=['sponsor_list', 'sponsorid_list', 'bill', 'number', 'conbill'], inplace=True)
bills

In [None]:
bills.drop(columns=['thomas_id'], inplace=True)
bills.rename(columns={'party': 'sponsor_party'}, inplace=True)
bills

In [None]:
df_bill_113['sponsor_list'] = df_bill_113.apply(lambda row: row['cosponsors'] + row['sponsors'] if row['cosponsors'] is not None else row['sponsors'], axis=1)
df_bill_113['sponsorid_list'] = df_bill_113.apply(lambda row: row['cosponsors_id'] + row['sponsors_id'] if row['cosponsors_id'] is not None else row['sponsors_id'], axis=1)

df_bill_113

In [None]:
df_bill_113.drop(columns=['cosponsors', 'cosponsors_id', 'cosponsors_party', 'sponsors', 'sponsors_id', 'sponsors_party', 'sponsorparty_list'], inplace=True)
df_bill_113

In [None]:
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')

for congress, sub_df in df_bill_113.groupby('congress'):
    # Generate a matrix for the sub-DataFrame
    all_bills = sub_df['bill_number'].unique()
    all_sponsors = list(set(name for sublist in sub_df['sponsor_list'] for name in sublist))
    matrix = pd.DataFrame(0, index=all_sponsors, columns=all_bills)
    for idx, row in sub_df.iterrows():
        bill_number = row['bill_number']
        sponsors = row['sponsor_list']
        for sponsor in sponsors:
            matrix.loc[sponsor, bill_number] = 1
    
    # Save the matrix as a CSV file
    matrix_filename = f'matrix_congress_{congress}.csv'
    matrix.to_csv(matrix_filename)
    print(f"Matrix for Congress {congress} saved as '{matrix_filename}'")

In [None]:
cols = ['congress', 'bill_number', 'bill', 'number']
df_bill_113 = df_bill_113[cols]
df_bill_113

In [None]:
def get_cosponsor(congress, bill, bill_number):
    try:
        xml_file_path = f"congress/{congress}/{bill}/{bill_number}/fdsys_billstatus.xml"
        if os.path.exists(xml_file_path):
            tree = ET.parse(xml_file_path)
            root = tree.getroot()
            cosponsor_data = {'congress': congress,
                              'bill_number': bill_number,
                              'id': [],
                              'name': [],
                              'party': []}
            cosponsors = root.find('.//sponsors')
            if cosponsors is not None:
                for item in cosponsors.findall('.//item'):
                    # print(item.find('bioguideId').text)
                    cosponsor_data['id'].append(item.find('bioguideId').text)
                    cosponsor_data['name'].append(item.find('fullName').text)
                    cosponsor_data['party'].append(item.find('party').text)

                df_cosponsors = pd.DataFrame(cosponsor_data)

                return df_cosponsors
            else:
                print('Sponsor element not found in XML:', congress, bill_number)
                return None
        else:
            print('Following bill could not be found:', congress, bill_number)
            return None
    except Exception as e:
        print('An error occurred while processing the bill:', congress, bill_number)
        print(e)
        return None

dfs = []
for index, row in df_bill_113.iterrows():
    cosponsor_df = get_cosponsor(row['congress'], row['bill'], row['bill_number'])
    if cosponsor_df is not None:
        dfs.append(cosponsor_df)

df_combined_sponsors = pd.concat(dfs, ignore_index=True)


#df_test = df_bill_113.apply(lambda row: get_cosponsor(row['congress'], row['bill'], row['bill_number']), axis=1)

In [None]:
df_combined_sponsors
df_combined_cosponsors

In [None]:
df_combined = pd.concat([df_combined_sponsors, df_combined_cosponsors], ignore_index=True)
df_combined

In [None]:
df_combined = df_combined.drop_duplicates()
df_combined

In [None]:
cols = ['congress', 'name', 'id', 'party']
df_legislators = df_combined[cols]
df_legislators

In [None]:
df_legislators = df_legislators.drop_duplicates()
df_legislators

In [None]:
for congress, sub_df in df_legislators.groupby('congress'):
    sub_df.to_csv(f'legislators_{congress}.csv', index=False)
    print(f"file for Congress {congress} saved'")

In [None]:
df_combined_sponsors

cols = ['topic', 'congress', 'bill_number']
topics = df_bills[cols]
topics
bill_topic = pd.merge(df_combined_sponsors, topics, on=['congress', 'bill_number'], how='left')
bill_topic

In [None]:
bill_topic.drop(columns=['id', 'name'])
bill_topic.rename(columns={'party':'sponsor_party'}, inplace=True)
bill_topic

In [None]:
bill_topic = bill_topic.drop_duplicates()
bill_topic

In [None]:
for congress, sub_df in bill_topic.groupby('congress'):
    sub_df.to_csv(f'bills_{congress}.csv', index=False)
    print(f"file for bills {congress} saved'")

## ADD NOMINATE SCORES

In [None]:
topic_dm = pd.read_csv('df_bills_topic_dm.tsv', sep='\t')
topic_enr = pd.read_csv('df_bills_topic_enr.tsv', sep='\t')
topic_gba = pd.read_csv('df_bills_topic_gba.tsv', sep='\t')
topic_id = pd.read_csv('df_bills_topic_id.tsv', sep='\t')
topic_irg = pd.read_csv('df_bills_topic_irg.tsv', sep='\t')
topic_lp = pd.read_csv('df_bills_topic_lp.tsv', sep='\t')
topic_sspw = pd.read_csv('df_bills_topic_sspw.tsv', sep='\t')
topic_dm['nameparty_id'] = topic_dm.index
topic_enr['nameparty_id'] = topic_enr.index
topic_gba['nameparty_id'] = topic_gba.index
topic_id['nameparty_id'] = topic_id.index
topic_irg['nameparty_id'] = topic_irg.index
topic_lp['nameparty_id'] = topic_lp.index
topic_sspw['nameparty_id'] = topic_sspw.index

In [None]:
topic_dm

In [None]:
topic_dm['topic'] = 'defense and military'
topic_enr['topic'] = 'environmental and natural resources'
topic_gba['topic'] = 'government budget and administration'
topic_id['topic'] = 'infrastructure and development'
topic_irg['topic'] = 'international relations and government'
topic_lp['topic'] = 'legislation and policy'
topic_sspw['topic'] = 'social services and public welfare'

cols = ['coord1D', 'coord2D', 'nameparty_id', 'topic']
topic_dm = topic_dm[cols]
topic_enr = topic_enr[cols]
topic_gba = topic_gba[cols]
topic_id = topic_id[cols]
topic_irg = topic_irg[cols]
topic_lp = topic_lp[cols]
topic_sspw = topic_sspw[cols]

topic_dm.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)
topic_enr.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)
topic_gba.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)
topic_id.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)
topic_irg.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)
topic_lp.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)
topic_sspw.rename(columns={'coord1D':'NOM1D', 'coord2D': 'NOM2D'}, inplace=True)

In [None]:
df_topic = pd.concat([topic_dm, topic_enr, topic_gba, topic_id, topic_irg, topic_lp, topic_sspw])
df_topic

In [None]:
df_mv = pd.read_csv('df_member_votes_fv_with_topics_3.tsv', sep='\t')
df_mv

In [None]:
df_mv.rename(columns={'topic_with_coherence':'topic'}, inplace=True)
df_mv

In [None]:
df_mv = pd.merge(df_mv, df_topic, on=['nameparty_id', 'topic'], how='left')
df_mv

In [None]:
with open("df_member_votes_fv_with_topics_3_nominate.tsv", "w+") as f:
    df_mv.to_csv(f, sep="\t", index=None)

In [None]:
df_mv['NOM1D'].isna().sum()

In [None]:
df_members = pd.read_csv('df_members_fv.tsv', sep='\t')
df_members

In [None]:
df_members['nameparty_id'].value_counts()

In [None]:
df_mem = pd.merge(df_members, df_topic, on=['nameparty_id'], how='left')
df_mem

In [None]:
df_mem['NOM1D'].isna().sum()

In [None]:
with open("df_member_fv_per_topic_nominate_3.tsv", "w+") as f:
    df_mem.to_csv(f, sep="\t", index=None)

### NEW TOPICS

In [None]:
bills = pd.read_csv("df_bills_fv_with_topics_3.tsv", sep='\t')
bills

In [None]:
cols = ['congress', 'bill_number', 'topic_with_coherence']
bills = bills[cols]
bills = bills.drop_duplicates()
bills

In [None]:
bill93 = pd.read_csv("bills_93.csv", sep=',')
bill93 = pd.merge(bill93, bills, on=['congress', 'bill_number'], how='left')
bill93.drop(columns=['topic'], inplace=True)
bill93

In [None]:
for congress_num in range(93, 119):
    filename = f"bills_{congress_num}.csv"
    bill_df = pd.read_csv(filename, sep=',')
    print(len(bill_df))

    merged_df = pd.merge(bill_df, bills, on=['congress', 'bill_number'], how='left')
    print(len(merged_df))
    
    merged_df.drop(columns=['topic'], inplace=True)
    
    output_filename = f"bills_{congress_num}.csv"
    merged_df.to_csv(output_filename, index=False)


In [None]:
df = pd.read_csv('df_bills_topic_enr.tsv', sep='\t')
df

In [None]:
import pandas as pd
df_legis = pd.read_csv('legislators-current.csv', sep=',')
df_legis_his = pd.read_csv('legislators-historical.csv', sep=',')
df_legis

In [None]:
df_93 = pd.read_csv('legislators_93.0.csv', sep=',')
df_93

In [None]:
cols = ['type', 'thomas_id']
df_legis = df_legis[cols]
df_legis_his = df_legis_his[cols]
df_legis = pd.concat([df_legis, df_legis_his], ignore_index=True)
df_legis = df_legis.dropna(subset=['thomas_id'])
df_legis

In [None]:
df_legis['thomas_id'] = df_legis['thomas_id'].astype(int)
df_legis

In [None]:
df_93 = pd.merge(df_93, df_legis, on=['thomas_id'], how='left')
df_93

In [None]:
df_93.to_csv('legislators_93.csv', index=False)

In [None]:
df_94 = pd.read_csv('legislators_94.0.csv', sep=',')
df_95 = pd.read_csv('legislators_95.0.csv', sep=',')
df_96 = pd.read_csv('legislators_96.0.csv', sep=',')
df_97 = pd.read_csv('legislators_97.0.csv', sep=',')
df_98 = pd.read_csv('legislators_98.0.csv', sep=',')
df_99 = pd.read_csv('legislators_99.0.csv', sep=',')
df_100 = pd.read_csv('legislators_100.0.csv', sep=',')
df_101 = pd.read_csv('legislators_101.0.csv', sep=',')
df_102 = pd.read_csv('legislators_102.0.csv', sep=',')
df_103 = pd.read_csv('legislators_103.0.csv', sep=',')
df_104 = pd.read_csv('legislators_104.0.csv', sep=',')
df_105 = pd.read_csv('legislators_105.0.csv', sep=',')
df_106 = pd.read_csv('legislators_106.0.csv', sep=',')
df_107 = pd.read_csv('legislators_107.0.csv', sep=',')
df_108 = pd.read_csv('legislators_108.0.csv', sep=',')
df_109 = pd.read_csv('legislators_109.0.csv', sep=',')
df_110 = pd.read_csv('legislators_110.0.csv', sep=',')
df_111 = pd.read_csv('legislators_111.0.csv', sep=',')
df_112 = pd.read_csv('legislators_112.0.csv', sep=',')

In [None]:
df_94 = pd.merge(df_94, df_legis, on=['thomas_id'], how='left')
df_95 = pd.merge(df_95, df_legis, on=['thomas_id'], how='left')
df_96 = pd.merge(df_96, df_legis, on=['thomas_id'], how='left')
df_97 = pd.merge(df_97, df_legis, on=['thomas_id'], how='left')
df_98 = pd.merge(df_98, df_legis, on=['thomas_id'], how='left')
df_99 = pd.merge(df_99, df_legis, on=['thomas_id'], how='left')
df_100 = pd.merge(df_100, df_legis, on=['thomas_id'], how='left')
df_101 = pd.merge(df_101, df_legis, on=['thomas_id'], how='left')
df_102 = pd.merge(df_102, df_legis, on=['thomas_id'], how='left')
df_103 = pd.merge(df_103, df_legis, on=['thomas_id'], how='left')
df_104 = pd.merge(df_104, df_legis, on=['thomas_id'], how='left')
df_105 = pd.merge(df_105, df_legis, on=['thomas_id'], how='left')
df_106 = pd.merge(df_106, df_legis, on=['thomas_id'], how='left')
df_107 = pd.merge(df_107, df_legis, on=['thomas_id'], how='left')
df_108 = pd.merge(df_108, df_legis, on=['thomas_id'], how='left')
df_109 = pd.merge(df_109, df_legis, on=['thomas_id'], how='left')
df_110 = pd.merge(df_110, df_legis, on=['thomas_id'], how='left')
df_111 = pd.merge(df_111, df_legis, on=['thomas_id'], how='left')
df_112 = pd.merge(df_112, df_legis, on=['thomas_id'], how='left')

In [None]:
df_94['type'].isna().sum()
df_95['type'].isna().sum()
df_96['type'].isna().sum()
df_97['type'].isna().sum()
df_98['type'].isna().sum()
df_99['type'].isna().sum()
df_100['type'].isna().sum()
df_101['type'].isna().sum()
df_102['type'].isna().sum()
df_103['type'].isna().sum()
df_104['type'].isna().sum()
df_105['type'].isna().sum()
df_106['type'].isna().sum()
df_107['type'].isna().sum()
df_108['type'].isna().sum()
df_109['type'].isna().sum()
df_110['type'].isna().sum()
df_111['type'].isna().sum()
df_112['type'].isna().sum()

In [None]:
df_94.to_csv('legislators_94.csv', index=False)
df_95.to_csv('legislators_95.csv', index=False)
df_96.to_csv('legislators_96.csv', index=False)
df_97.to_csv('legislators_97.csv', index=False)
df_98.to_csv('legislators_98.csv', index=False)
df_99.to_csv('legislators_99.csv', index=False)
df_100.to_csv('legislators_100.csv', index=False)
df_101.to_csv('legislators_101.csv', index=False)
df_102.to_csv('legislators_102.csv', index=False)
df_103.to_csv('legislators_103.csv', index=False)
df_104.to_csv('legislators_104.csv', index=False)
df_105.to_csv('legislators_105.csv', index=False)
df_106.to_csv('legislators_106.csv', index=False)
df_107.to_csv('legislators_107.csv', index=False)
df_108.to_csv('legislators_108.csv', index=False)
df_109.to_csv('legislators_109.csv', index=False)
df_110.to_csv('legislators_110.csv', index=False)
df_111.to_csv('legislators_111.csv', index=False)
df_112.to_csv('legislators_112.csv', index=False)

In [None]:
df_113 = pd.read_csv('legislators_113.csv', sep=',')
df_114 = pd.read_csv('legislators_114.csv', sep=',')
df_115 = pd.read_csv('legislators_115.csv', sep=',')
df_116 = pd.read_csv('legislators_116.csv', sep=',')
df_117 = pd.read_csv('legislators_117.csv', sep=',')
df_118 = pd.read_csv('legislators_118.csv', sep=',')

In [None]:
df_113['type'] = df_113['name'].str[:4].str.lower()
df_114['type'] = df_114['name'].str[:4].str.lower()
df_115['type'] = df_115['name'].str[:4].str.lower()
df_116['type'] = df_116['name'].str[:4].str.lower()
df_117['type'] = df_117['name'].str[:4].str.lower()
df_118['type'] = df_118['name'].str[:4].str.lower()

In [None]:
df_113.to_csv('legislators_113.csv', index=False)
df_114.to_csv('legislators_114.csv', index=False)
df_115.to_csv('legislators_115.csv', index=False)
df_116.to_csv('legislators_116.csv', index=False)
df_117.to_csv('legislators_117.csv', index=False)
df_118.to_csv('legislators_118.csv', index=False)