<a href="https://colab.research.google.com/github/pathakharsh123/Justice-Prediction-System/blob/master/AI_powered_justice_prediction_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pandas
!pip install numpy
!pip install scikit-learn

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.calibration import CalibratedClassifierCV

# Just for visuals
pd.set_option('display.max_colwidth', None)



# 1. Dataset Preparation

## Load Pre-processed Dataset

In [None]:
df = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/gsc/Final_dataset.pkl')
df.rename(columns={'Facts': 'facts'}, inplace=True)
df.drop(columns=['index'], inplace=True)
df.reset_index(inplace=True)

print(f'There are {len(df)} cases.')

There are 3464 cases.


In [None]:
# Create a mirrored case for each case, where the parties are swapped to prevent favoring first_party
df_list = df.values.tolist()
result = []
for row in df_list:
    result.append(row[1:])
    mirrored_row = row.copy()
    #  first_party is at index=4, second=5, winner_index=7
    mirrored_row[4] = row[5]
    mirrored_row[5] = row[4]
    mirrored_row[7] = 1-mirrored_row[7]
    result.append(mirrored_row[1:])
df2 = pd.DataFrame(result)
df2.rename(columns={
    0: 'ID',
    1: 'name',
    2: 'href',
    3: 'first_party',
    4: 'second_party',
    5: 'winning_party',
    6: 'winner_index',
    7: 'facts',
}, inplace=True)
df = df2
df.reset_index(inplace=True)

In [None]:
avg_char = df['facts'].apply(lambda x: len(str(x))).mean()
print(f'Average facts character length: {avg_char:.0f}')

avg_word = df['facts'].apply(lambda x: len(str(x).split())).mean()
print(f'Average facts word length: {avg_word:.0f}')

del avg_char, avg_word

Average facts character length: 1179
Average facts word length: 189


In [None]:
display(df.head(n=3))

Unnamed: 0,index,ID,name,href,first_party,second_party,winning_party,winner_index,facts
0,0,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,Jane Roe,Henry Wade,Jane Roe,0,"In 1970, Jane Roe (a fictional name used in court documents to protect the plaintiff’s identity) filed a lawsuit against Henry Wade, the district attorney of Dallas County, Texas, where she resided, challenging a Texas law making abortion illegal except by a doctor’s orders to save a woman’s life. In her lawsuit, Roe alleged that the state laws were unconstitutionally vague and abridged her right of personal privacy, protected by the First, Fourth, Fifth, Ninth, and Fourteenth Amendments."
1,1,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,Henry Wade,Jane Roe,Jane Roe,1,"In 1970, Jane Roe (a fictional name used in court documents to protect the plaintiff’s identity) filed a lawsuit against Henry Wade, the district attorney of Dallas County, Texas, where she resided, challenging a Texas law making abortion illegal except by a doctor’s orders to save a woman’s life. In her lawsuit, Roe alleged that the state laws were unconstitutionally vague and abridged her right of personal privacy, protected by the First, Fourth, Fifth, Ninth, and Fourteenth Amendments."
2,2,50613,Stanley v. Illinois,https://api.oyez.org/cases/1971/70-5014,"Peter Stanley, Sr.",Illinois,Stanley,0,"Joan Stanley had three children with Peter Stanley. The Stanleys never married, but lived together off and on for 18 years. When Joan died, the State of Illinois took the children. Under Illinois law, unwed fathers were presumed unfit parents regardless of their actual fitness and their children became wards of the state. Peter appealed the decision, arguing that the Illinois law violated the Equal Protection Clause of the Fourteenth Amendment because unwed mothers were not deprived of their children without a showing that they were actually unfit parents. The Illinois Supreme Court rejected Stanley’s Equal Protection claim, holding that his actual fitness as a parent was irrelevant because he and the children’s mother were unmarried."


In [None]:
print(f'There are {len(df)} cases.')
print(f'There are {len(df[df["winner_index"]==0])} rows for class 0.')
print(f'There are {len(df[df["winner_index"]==1])} rows for class 1.')

There are 6928 cases.
There are 3464 rows for class 0.
There are 3464 rows for class 1.


In [None]:
# Facts character stats
df['facts'].apply(lambda x: len(str(x))).describe()

count    6928.000000
mean     1179.302252
std       556.295521
min        95.000000
25%       784.000000
50%      1112.500000
75%      1496.000000
max      6108.000000
Name: facts, dtype: float64

In [None]:
# Facts word stats
df['facts'].apply(lambda x: len(str(x).split())).describe()

count    6928.000000
mean      188.618938
std        91.490377
min        13.000000
25%       125.000000
50%       176.000000
75%       239.000000
max       974.000000
Name: facts, dtype: float64

# 2. AutoJudge
Given the neutral absolute facts, predict the decision of the case.

In [None]:
# # Perform an 80-20 split for training and testing data
X_train_party1_text, X_test_party1_text, \
X_train_party2_text, X_test_party2_text, \
X_train_facts_text, X_test_facts_text, \
y_train, y_test = train_test_split(
    df['first_party'],
    df['second_party'],
    df['facts'],
    df['winner_index'],
    test_size=0.2,
    stratify=df['winner_index']
)

In [None]:
# TF-IDF Feature Extraction
vectorizer = TfidfVectorizer()
X_train_facts = vectorizer.fit_transform(X_train_facts_text)
X_test_facts = vectorizer.transform(X_test_facts_text)
X_train_party1 = vectorizer.transform(X_train_party1_text)
X_test_party1 = vectorizer.transform(X_test_party1_text)
X_train_party2 = vectorizer.transform(X_train_party2_text)
X_test_party2 = vectorizer.transform(X_test_party2_text)

X_train = np.concatenate([X_train_party1.todense(), X_train_party2.todense(), X_train_facts.todense()], axis=1)
X_test = np.concatenate([X_test_party1.todense(), X_test_party2.todense(), X_test_facts.todense()], axis=1)

del X_train_facts, X_train_party1, X_train_party2
del X_test_facts, X_test_party1, X_test_party2

## Perceptron

In [None]:
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

# Perceptron
model_perceptron = Perceptron(
    alpha=0.0001,
    max_iter=5,
    n_iter_no_change=5,
    penalty='l1',
    tol=1e-3,
    validation_fraction=0.1
)
model_perceptron.fit(X_train, y_train)

y_train_pred = model_perceptron.predict(X_train)
y_test_pred = model_perceptron.predict(X_test)

print('Perceptron - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('Perceptron - Test\n', classification_report(y_test, y_test_pred, zero_division=0))



Perceptron - Train
               precision    recall  f1-score   support

           0       0.98      0.23      0.37      2771
           1       0.56      0.99      0.72      2771

    accuracy                           0.61      5542
   macro avg       0.77      0.61      0.55      5542
weighted avg       0.77      0.61      0.55      5542

Perceptron - Test
               precision    recall  f1-score   support

           0       0.90      0.18      0.29       693
           1       0.54      0.98      0.70       693

    accuracy                           0.58      1386
   macro avg       0.72      0.58      0.50      1386
weighted avg       0.72      0.58      0.50      1386



## SVM

In [None]:
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

# SVM
model_svm = LinearSVC(
    max_iter=5,
    C=0.1,
    intercept_scaling=0.1,
    loss='squared_hinge',
    penalty='l2',
    tol=1e-2
)
model_svm.fit(X_train, y_train)

y_train_pred = model_svm.predict(X_train)
y_test_pred = model_svm.predict(X_test)

print('SVM - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('SVM - Test\n', classification_report(y_test, y_test_pred, zero_division=0))



SVM - Train
               precision    recall  f1-score   support

           0       0.90      0.91      0.91      2771
           1       0.91      0.90      0.90      2771

    accuracy                           0.91      5542
   macro avg       0.91      0.91      0.91      5542
weighted avg       0.91      0.91      0.91      5542

SVM - Test
               precision    recall  f1-score   support

           0       0.61      0.63      0.62       693
           1       0.61      0.59      0.60       693

    accuracy                           0.61      1386
   macro avg       0.61      0.61      0.61      1386
weighted avg       0.61      0.61      0.61      1386



## Logistic Regression

In [None]:
# Logistic Regression
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

model_log_reg = LogisticRegression()
model_log_reg.fit(X_train, y_train)

y_train_pred = model_log_reg.predict(X_train)
y_test_pred = model_log_reg.predict(X_test)

print('Logistic Regression - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('Logistic Regression - Test\n', classification_report(y_test, y_test_pred, zero_division=0))

Logistic Regression - Train
               precision    recall  f1-score   support

           0       0.91      0.92      0.91      2771
           1       0.92      0.91      0.91      2771

    accuracy                           0.91      5542
   macro avg       0.91      0.91      0.91      5542
weighted avg       0.91      0.91      0.91      5542

Logistic Regression - Test
               precision    recall  f1-score   support

           0       0.61      0.62      0.62       693
           1       0.62      0.60      0.61       693

    accuracy                           0.61      1386
   macro avg       0.61      0.61      0.61      1386
weighted avg       0.61      0.61      0.61      1386



## Naive Bayes

In [None]:
# Naive Bayes
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

model_naive_bayes = MultinomialNB(
    alpha=3
)
model_naive_bayes.fit(X_train, y_train)

y_train_pred = model_naive_bayes.predict(X_train)
y_test_pred = model_naive_bayes.predict(X_test)

print('Naive Bayes - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('Naive Bayes - Test\n', classification_report(y_test, y_test_pred, zero_division=0))

Naive Bayes - Train
               precision    recall  f1-score   support

           0       0.85      0.85      0.85      2771
           1       0.85      0.86      0.85      2771

    accuracy                           0.85      5542
   macro avg       0.85      0.85      0.85      5542
weighted avg       0.85      0.85      0.85      5542

Naive Bayes - Test
               precision    recall  f1-score   support

           0       0.60      0.60      0.60       693
           1       0.60      0.61      0.60       693

    accuracy                           0.60      1386
   macro avg       0.60      0.60      0.60      1386
weighted avg       0.60      0.60      0.60      1386



## Multi-layer Perceptron

In [None]:
# MLP
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

model_mlp = MLPClassifier(
    early_stopping=True,
    beta_2=0,
    max_iter=10,
)
model_mlp.fit(X_train, y_train)

y_train_pred = model_mlp.predict(X_train)
y_test_pred = model_mlp.predict(X_test)

print('MLP - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('MLP - Test\n', classification_report(y_test, y_test_pred, zero_division=0))



MLP - Train
               precision    recall  f1-score   support

           0       0.93      0.90      0.91      2771
           1       0.90      0.94      0.92      2771

    accuracy                           0.92      5542
   macro avg       0.92      0.92      0.92      5542
weighted avg       0.92      0.92      0.92      5542

MLP - Test
               precision    recall  f1-score   support

           0       0.65      0.61      0.63       693
           1       0.63      0.67      0.65       693

    accuracy                           0.64      1386
   macro avg       0.64      0.64      0.64      1386
weighted avg       0.64      0.64      0.64      1386



## K-Nearest Neighbors

In [None]:
# KNN
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

model_knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
model_knn.fit(X_train, y_train)

y_train_pred = model_knn.predict(X_train)
y_test_pred = model_knn.predict(X_test)

print('KNN - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('KNN - Test\n', classification_report(y_test, y_test_pred, zero_division=0))

KNN - Train
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2771
           1       1.00      1.00      1.00      2771

    accuracy                           1.00      5542
   macro avg       1.00      1.00      1.00      5542
weighted avg       1.00      1.00      1.00      5542

KNN - Test
               precision    recall  f1-score   support

           0       0.68      0.74      0.71       693
           1       0.71      0.65      0.68       693

    accuracy                           0.70      1386
   macro avg       0.70      0.70      0.69      1386
weighted avg       0.70      0.70      0.69      1386



## Calibrated Classifier

In [None]:
# Calibrated Classifier
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

model_calibrated_classifier = CalibratedClassifierCV(
    method='isotonic'
)
model_calibrated_classifier.fit(X_train, y_train)

y_train_pred = model_calibrated_classifier.predict(X_train)
y_test_pred = model_calibrated_classifier.predict(X_test)

print('Calibrated Classifier - Train\n', classification_report(y_train, y_train_pred, zero_division=0))
print('Calibrated Classifier - Test\n', classification_report(y_test, y_test_pred, zero_division=0))

Calibrated Classifier - Train
               precision    recall  f1-score   support

           0       0.94      0.95      0.94      2771
           1       0.95      0.94      0.94      2771

    accuracy                           0.94      5542
   macro avg       0.94      0.94      0.94      5542
weighted avg       0.94      0.94      0.94      5542

Calibrated Classifier - Test
               precision    recall  f1-score   support

           0       0.61      0.65      0.63       693
           1       0.63      0.59      0.61       693

    accuracy                           0.62      1386
   macro avg       0.62      0.62      0.62      1386
weighted avg       0.62      0.62      0.62      1386



## Create Ensemble

In [None]:
models = []
models.append(('perceptron', model_perceptron))
models.append(('svm', model_svm))
models.append(('logistic_regression', model_log_reg))
models.append(('naive_bayes', model_naive_bayes))
models.append(('multi_layer_perceptron', model_mlp))
models.append(('k_nearest_neighbors', model_knn))
models.append(('calibrated_classifier', model_calibrated_classifier))

In [None]:
# Ensemble
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

ensemble = VotingClassifier(models, voting='hard')
ensemble.fit(X_train, y_train)

y_train_pred = ensemble.predict(X_train)
y_test_pred = ensemble.predict(X_test)

print('Ensemble - Train\n', classification_report(y_train, y_train_pred, zero_division=0))

print('Ensemble - Test\n', classification_report(y_test, y_test_pred, zero_division=0))



Ensemble - Train
               precision    recall  f1-score   support

           0       0.93      0.92      0.92      2771
           1       0.92      0.93      0.93      2771

    accuracy                           0.93      5542
   macro avg       0.93      0.93      0.93      5542
weighted avg       0.93      0.93      0.93      5542

Ensemble - Test
               precision    recall  f1-score   support

           0       0.62      0.62      0.62       693
           1       0.62      0.61      0.62       693

    accuracy                           0.62      1386
   macro avg       0.62      0.62      0.62      1386
weighted avg       0.62      0.62      0.62      1386



In [None]:
import pickle
filename = 'ensemble_model.pkl'

with open(filename, 'wb') as file:
    pickle.dump(ensemble, file)
print(ensemble)

VotingClassifier(estimators=[('perceptron',
                              Perceptron(max_iter=5, penalty='l1')),
                             ('svm',
                              LinearSVC(C=0.1, intercept_scaling=0.1,
                                        max_iter=5, tol=0.01)),
                             ('logistic_regression', LogisticRegression()),
                             ('naive_bayes', MultinomialNB(alpha=3)),
                             ('multi_layer_perceptron',
                              MLPClassifier(beta_2=0, early_stopping=True,
                                            max_iter=10)),
                             ('k_nearest_neighbors',
                              KNeighborsClassifier(n_neighbors=3,
                                                   weights='distance')),
                             ('calibrated_classifier',
                              CalibratedClassifierCV(method='isotonic'))])


## Demo

In [None]:
def predict(party1, party2, facts):
    X_party1 = vectorizer.transform([party1]).todense()
    X_party2 = vectorizer.transform([party2]).todense()
    X_facts = vectorizer.transform([facts]).todense()

    # Convert to NumPy arrays
    X_party1 = np.asarray(X_party1)
    X_party2 = np.asarray(X_party2)
    X_facts = np.asarray(X_facts)

    # Concatenate along the correct axis
    X = np.concatenate([X_party1, X_party2, X_facts], axis=1)
    return ensemble.predict(X)[0]


In [None]:
out = predict('Jake', 'John', 'John was assaulted by Jake at gun point. He bled severely while Jake escaped the crime scene. The entire footage was captured by a CCTV of a nearby gas station.')
print(f'Expecting 1 but got {out}')

Expecting 1 but got 1


In [None]:
out = predict('The Florida Bar', 'Lanell Williams-Yulee', """During her candidacy for County Court Judge in Hillsborough County, Florida, Lanell Williams-Yulee personally solicited campaign contributions. She stated that she served as the "community Public Defender" – although her title was "assistant public defender" – and inaccurately stated in the media that there was no incumbent in the judicial race for which she was running. The Florida Bar filed a complaint against Williams-Yulee and alleged that her actions during the campaign violated the rules regulating The Florida Bar. A referee was appointed who suggested that Williams-Yulee receive a public reprimand. Williams-Yulee appealed the referee's finding, and the Supreme Court of Florida held that Williams-Yulee violated bar rules for directly soliciting funds for her judicial campaign. Williams-Yulee appealed and claimed that The Florida Bar rule prohibiting a candidate from personal solicitation of funds violated the First Amendment protection of freedom of speech.""")
print(f'Expecting 0 but got {out}')

Expecting 0 but got 0


In [None]:
out = predict('Rob Bonta, Attorney General of California', 'Americans for Prosperity Foundation', """The California Attorney General’s office has a policy requiring charities to provide the state, on a confidential basis, information about their major donors, purportedly to help the state protect consumers from fraud and the misuse of their charitable contributions. Petitioner Americans for Prosperity (and the petitioner in the consolidated case, Thomas More Law Center) either failed to file or filed redacted lists of their major donors with the California Attorney General’s office, despite filing complete lists with the federal Internal Revenue Service, as required by federal law. In response to demands by the California Attorney General that they file the lists, the organizations filed a lawsuit alleging that the filing requirement unconstitutionally burdened their First Amendment right to free association by deterring individuals from financially supporting them. The organizations provided evidence that although the state is required to keep donor names private, the state’s database was vulnerable to hacking, and many donor names were repeatedly released to the public. Based in part on this finding, the district court granted both organizations’ motions for a preliminary injunction and then ultimately found for them after a trial, holding that the organizations and their donors were entitled to First Amendment protection under the principles established in the Supreme Court’s decision in NAACP v. Alabama. In so holding, the court reasoned that the government’s filing demands were not the “least restrictive means” of obtaining the information and thus did not satisfy “strict scrutiny.” A panel of the you.S. Court of Appeals for the Ninth Circuit reversed, based on its conclusion that “exacting scrutiny” rather than “strict scrutiny” was the appropriate standard, and “exacting scrutiny” requires that the government show that the disclosure and reporting requirements are justified by a compelling government interest and that the legislation is narrowly tailored to serve that interest. The Ninth Circuit denied the petition for a rehearing en banc.""")
print(f'Expecting 1 but got {out}')

Expecting 1 but got 1


In [None]:
out = predict('Max', 'University of Washington Law School', """Max was denied admission to the University of Washington Law School despite test scores that were higher than some of the minorities admitted. Max then successfully asked a trial court to require the school to admit him. On appeal, the Washington Supreme Court reversed, upholding the school's decision to deny Max admission. The you.S. Supreme Court considered the case as Max was entering his final year of school.""")
print(f'Expecting 0 but got {out}')

Expecting 0 but got 1


#WEB INTERFACE FOR THE ML MODEL

In [None]:
!pip install flask-ngrok
!pip install pyngrok==4.1.1
!ngrok authtoken 2c2PyGDaCqRLn9yb6C2AXpTwvyV_492CBNYwhhK8w1aGdj3jW

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
%cd /content/drive/MyDrive/Colab Notebooks/gsc

/content/drive/MyDrive/Colab Notebooks/gsc


In [None]:
from  flask_ngrok import run_with_ngrok
from flask import Flask, request, render_template
import pickle
import numpy as np

In [None]:


app = Flask(__name__)


run_with_ngrok(app)

model = pickle.load(open('trained_model.pkl', 'rb'))

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict',methods=['POST'])
def predict():
    party1 = request.form['firstParty']
    party2 = request.form['secondParty']
    facts = request.form['caseDescription']

    X_party1 = vectorizer.transform([party1]).todense()
    X_party2 = vectorizer.transform([party2]).todense()
    X_facts = vectorizer.transform([facts]).todense()

    # Convert to NumPy arrays
    X_party1 = np.asarray(X_party1)
    X_party2 = np.asarray(X_party2)
    X_facts = np.asarray(X_facts)

    # Concatenate along the correct axis
    X = np.concatenate([X_party1, X_party2, X_facts], axis=1)
    prediction = model.predict(X)[0]
    winner = "First Party" if prediction == 0 else "Second Party"
    return render_template('index.html', output='Winner: {}'.format(winner))



app.run()