<a href="https://colab.research.google.com/github/JQmiracle/BA_865_Final_Project/blob/main/Startcode_decision_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
# !pip install pandas
# !pip install numpy
# !pip install sklearn

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.calibration import CalibratedClassifierCV

# Just for visuals
pd.set_option('display.max_colwidth', None)

# Dataset Preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_pickle('/content/drive/MyDrive/BA865_Team/task1_data.pkl')
df.rename(columns={'Facts': 'facts'}, inplace=True)
df.drop(columns=['index'], inplace=True)
df.reset_index(inplace=True)

print(f'There are {len(df)} cases.')

There are 3464 cases.


In [None]:
# Create a mirrored case for each case, where the parties are swapped to prevent favoring first_party
df_list = df.values.tolist()
result = []
for row in df_list:
    result.append(row[1:])
    mirrored_row = row.copy()
    #  first_party is at index=4, second=5, winner_index=7
    mirrored_row[4] = row[5]
    mirrored_row[5] = row[4]
    mirrored_row[7] = 1-mirrored_row[7]
    result.append(mirrored_row[1:])
df2 = pd.DataFrame(result)
df2.rename(columns={
    0: 'ID',
    1: 'name',
    2: 'href',
    3: 'first_party',
    4: 'second_party',
    5: 'winning_party',
    6: 'winner_index',
    7: 'facts',
}, inplace=True)
df = df2
df.reset_index(inplace=True)

In [None]:
avg_char = df['facts'].apply(lambda x: len(str(x))).mean()
print(f'Average facts character length: {avg_char:.0f}')

avg_word = df['facts'].apply(lambda x: len(str(x).split())).mean()
print(f'Average facts word length: {avg_word:.0f}')

del avg_char, avg_word

Average facts character length: 1179
Average facts word length: 189


In [None]:
display(df.head(n=3))

Unnamed: 0,index,ID,name,href,first_party,second_party,winning_party,winner_index,facts
0,0,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,Jane Roe,Henry Wade,Jane Roe,0,"In 1970, Jane Roe (a fictional name used in court documents to protect the plaintiff’s identity) filed a lawsuit against Henry Wade, the district attorney of Dallas County, Texas, where she resided, challenging a Texas law making abortion illegal except by a doctor’s orders to save a woman’s life. In her lawsuit, Roe alleged that the state laws were unconstitutionally vague and abridged her right of personal privacy, protected by the First, Fourth, Fifth, Ninth, and Fourteenth Amendments."
1,1,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,Henry Wade,Jane Roe,Jane Roe,1,"In 1970, Jane Roe (a fictional name used in court documents to protect the plaintiff’s identity) filed a lawsuit against Henry Wade, the district attorney of Dallas County, Texas, where she resided, challenging a Texas law making abortion illegal except by a doctor’s orders to save a woman’s life. In her lawsuit, Roe alleged that the state laws were unconstitutionally vague and abridged her right of personal privacy, protected by the First, Fourth, Fifth, Ninth, and Fourteenth Amendments."
2,2,50613,Stanley v. Illinois,https://api.oyez.org/cases/1971/70-5014,"Peter Stanley, Sr.",Illinois,Stanley,0,"Joan Stanley had three children with Peter Stanley. The Stanleys never married, but lived together off and on for 18 years. When Joan died, the State of Illinois took the children. Under Illinois law, unwed fathers were presumed unfit parents regardless of their actual fitness and their children became wards of the state. Peter appealed the decision, arguing that the Illinois law violated the Equal Protection Clause of the Fourteenth Amendment because unwed mothers were not deprived of their children without a showing that they were actually unfit parents. The Illinois Supreme Court rejected Stanley’s Equal Protection claim, holding that his actual fitness as a parent was irrelevant because he and the children’s mother were unmarried."


# predict the decision of the case

In [None]:
# # Perform an 80-20 split for training and testing data
X_train_party1_text, X_test_party1_text, \
X_train_party2_text, X_test_party2_text, \
X_train_facts_text, X_test_facts_text, \
y_train, y_test = train_test_split(
    df['first_party'],
    df['second_party'],
    df['facts'],
    df['winner_index'],
    test_size=0.2,
    stratify=df['winner_index']
)

In [None]:
# TF-IDF Feature Extraction
vectorizer = TfidfVectorizer()
X_train_facts = vectorizer.fit_transform(X_train_facts_text)
X_test_facts = vectorizer.transform(X_test_facts_text)
X_train_party1 = vectorizer.transform(X_train_party1_text)
X_test_party1 = vectorizer.transform(X_test_party1_text)
X_train_party2 = vectorizer.transform(X_train_party2_text)
X_test_party2 = vectorizer.transform(X_test_party2_text)

X_train = np.concatenate([X_train_party1.todense(), X_train_party2.todense(), X_train_facts.todense()], axis=1)
X_test = np.concatenate([X_test_party1.todense(), X_test_party2.todense(), X_test_facts.todense()], axis=1)

del X_train_facts, X_train_party1, X_train_party2
del X_test_facts, X_test_party1, X_test_party2

In [None]:
# Perceptron
model_perceptron = Perceptron(
    alpha=0.0001,
    max_iter=5,
    n_iter_no_change=5,
    penalty='l1',
    tol=1e-3,
    validation_fraction=0.1
)
model_perceptron.fit(X_train, y_train)

y_train_pred = model_perceptron.predict(X_train)
y_test_pred = model_perceptron.predict(X_test)

print('Perceptron - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('Perceptron - Test\n', classification_report(y_test, y_test_pred, zero_division=0))

# Simple Neural Network