In [114]:
import numpy as np
import pandas as pd

#import pickle to dump extracted features 
import pickle

In [121]:
df = pd.read_csv('./terms_with_sentences.csv')
df.head()

Unnamed: 0,Terms,Sents
0,acquittal,
1,judgement,
2,defendant,
3,affidavit,mattis nunc sed blandit libero volutpat at tel...
4,appellate,it before a notary or officer having authority...


In [122]:
#drop nan rows
df.dropna(inplace=True)

In [123]:
# Check for NaN values:
df.isnull().sum()

Terms    0
Sents    0
dtype: int64

In [124]:
df['Terms'].value_counts()

court                     52
jury                      16
judge                     15
defendant                 13
evidence                  13
lawsuit                    9
appellate                  8
answer                     7
judgement                  7
parties                    7
testimony                  7
witness                    6
plaintiff                  5
complaint                  5
record                     4
bankruptcy                 4
opinion                    4
appeal                     4
discovery                  3
transcript                 3
petit jury                 3
jurisdiction               3
clerk of court             2
misdemeanor                2
procedure                  2
reverse                    2
information                2
sentence                   2
settlement                 2
remand                     2
                          ..
statute of limitations     1
indictment                 1
court reporter             1
sequester     

In [126]:
#Split the data into train & test sets
from sklearn.model_selection import train_test_split

X = df['Sents']
y = df['Terms']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=12)

In [127]:
#vectorize the data, then train and fit a model
import sklearn.decomposition
import sklearn.feature_selection

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

#Build model
'''model = Pipeline([
                  ('tfidf', TfidfVectorizer(max_df=0.5, max_features=100,min_df=2, use_idf=True)), 
                  ('clf', LinearSVC()),
                  ('classify', sklearn.linear_model.LogisticRegressionCV()),
                                   
])'''


model = sklearn.pipeline.Pipeline([
    #('pca', sklearn.decomposition.PCA()),
    ('tfidf', TfidfVectorizer()),
    ('classify', sklearn.linear_model.LogisticRegressionCV()),
    #('clf', LinearSVC()),
])

model.fit(X_train, y_train)



Pipeline(memory=None,
     steps=[('tfidf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
 ...2', random_state=None,
           refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0))])

In [128]:
# Form a prediction set
predictions = model.predict(X_test)

In [129]:
#cm
from sklearn import metrics
print(metrics.confusion_matrix(y_test,predictions))

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [130]:
print(metrics.accuracy_score(y_test,predictions))

0.125


In [131]:
print(metrics.classification_report(y_test,predictions))

                      precision    recall  f1-score   support

              answer       0.00      0.00      0.00         2
              appeal       0.00      0.00      0.00         1
           appellate       0.00      0.00      0.00         3
         arraignment       0.00      0.00      0.00         1
          bankruptcy       0.00      0.00      0.00         3
               brief       0.00      0.00      0.00         1
     capital offense       0.00      0.00      0.00         1
  charge to the jury       0.00      0.00      0.00         1
         chief judge       0.00      0.00      0.00         1
      clerk of court       0.00      0.00      0.00         1
             counsel       0.00      0.00      0.00         1
               court       0.15      0.85      0.26        13
             default       0.00      0.00      0.00         1
   default judgement       0.00      0.00      0.00         1
           defendant       0.00      0.00      0.00         2
       

  'precision', 'predicted', average, warn_for)


In [132]:
# pickle model
from sklearn.externals import joblib
joblib.dump(model, "Phrase_Extractor.pickle")

['Phrase_Extractor.pickle']

In [134]:
model.predict([""" the traces its origins directly back to 1855, when Congress established the United States Court of Claims to provide for the determination of private claims against the United States government. The legislation was signed into law on February 24, 1855, by President Franklin Pierce. Throughout its 160-year history, although it has undergone notable changes in name, size, scope of jurisdiction, and procedures, its purpose has remained the same: in this court the federal government stands as the defendant and may be sued by citizens seeking monetary redress. For this reason, the court has been referred to as the "keeper of the nation's conscience" and "the People's Court."

As originally in 1855, the court lacked the essential judicial power to render final judgments. This oversight was resolved by legislation passed in 1866, in response to President Abraham Lincoln's insistence in his Annual Message to Congress in 1861 that, "It is as much the duty of Government to render prompt justice against itself, in favor of citizens, as it is to administer the same, between private individuals."

In 1887, Congress passed the Tucker Act, which significantly expanded the court's jurisdiction to include all claims against the government except tort, equitable, and admiralty claims. The court thus today has nationwide jurisdiction over most suits for monetary claims against the government and sits, without a jury, to determine issues of law and fact. The general jurisdiction of the court, described in 28 U.S.C. § 1491,[1] is over claims for just compensation for the taking of private property, refund of federal taxes, military and civilian pay and allowances, and damages for breaches of contracts with the government. The court also possesses jurisdiction over claims for patent and copyright infringement against the United States, as well as over certain suits by Indian tribes.

Additionally, the court has jurisdiction to hear both pre-award and post-award bid protest suits by unsuccessful bidders on government contracts.

A unique aspect of the court's jurisdiction throughout its history has been the authority to act on congressional references of legislative proposals for compensation of individual claims. As eventually codified in 28 U.S.C. § 1492,[2] either House of Congress may refer a bill to the Chief Judge of the court for an investigation and a report to Congress. A judge of the court is assigned to act as the hearing officer and preside over the judicial proceedings. Then a three-judge review panel submits a report to Congress for its consideration and disposition of such claims for compensation.

Befitting its unique role, the court has been located throughout its history in Washington, D.C., in the vicinity of the White House or in the U.S. Capitol Building. It first met in May 1855 at Willard's Hotel. In July of that year, it moved into the Capitol. After briefly using the Supreme Court's chamber in the basement of the Capitol, it then acquired its own rooms there. In 1879, the court obtained space on the ground floor of the Freedman's Bank Building, which stood at the place now occupied by the Treasury Annex, adjacent to the southeast corner of Lafayette Park. Two decades later, in 1899, the court moved to the building formerly occupied by William Corcoran's art collection across Lafayette Park at the intersection of 17th Street and Pennsylvania Avenue.

It remained there for 65 years. This building was designed by, and is presently named for, the architect James Renwick, who also designed the Smithsonian Institution's Castle on the National Mall and St. Patrick's Cathedral in New York City. When the facilities there were deemed inadequate by the mid- 1950s, the court asked Congress for a new location. Eventually, the site at 717 Madison Place, NW, was chosen and the court moved to its present home on August 1, 1967.

The court's original composition of three judges was expanded to five in 1863. They would consider evidence proffered by claimants and weigh testimony taken by permanent or special commissioners employed by the court, who were dispersed across the United States. One of the first commissioners was Benjamin Harrison of Indiana, who would later become President.[3] If oral argument was requested, the five judges would hear the case en banc. Appeal to the Supreme Court was by right if the amount in dispute was over $3,000. The growth in government caused by and coinciding with World War I made the system unworkable, as the number of filed cases increased considerably. In 1925, legislation enacted by Congress at the request of the court created a separate trial division of seven commissioners and elevated the five judges to an appellate role. Initially, the trial commissioners would function as special masters in chancery and conduct formal proceedings either at the court's home in Washington, D.C., or elsewhere in the United States in a court facility amenable to the parties. The trial procedures evolved to resemble a non-jury civil trial in district court.

In 1948, the commissioners were authorized to make recommendations for conclusions of law. The number of commissioners was increased in 1953 to 15. In 1966, Congress provided that there would be seven appellate judges to be appointed by the President with life tenure. In 1973, the title of the commissioners was changed to trial judge and by 1977, the Court of Claims had 16 trial judges who conducted trials of cases in the first instance. Judgments, which are required to be paid out of appropriations by Congress, were originally paid by individual appropriations passed separately or as part of other appropriations bills. In 1955, Congress provided for a standing appropriation for judgments of $100,000 or less. Finally, in 1977, Congress created a permanent, indefinite appropriation for all judgments awarded by the court.

The Federal Courts Improvement Act of 1982 created the modern court.[4] While the appellate division of the Court of Claims was combined with the United States Court of Customs and Patent Appeals to comprise the new United States Court of Appeals for the Federal Circuit, the trial division of the Court of Claims became the United States Claims Court (and in 1992, the name was changed to the United States Court of Federal Claims).[5] Appeals from the Court of Federal Claims are taken to the United States Court of Appeals for the Federal Circuit and a judgment there is conclusive unless reviewed by the Supreme Court on writ of certiorari. Decisions of the Court of Claims are binding precedent on both its appellate and trial court successors.

The court, as now constituted, consists of 16 judges, appointed by the President and subject to confirmation by the U.S. Senate for terms of 15 years. In addition, judges who have completed their statutory terms of office are authorized to continue to take cases as senior judges of the court. This ongoing tenure serves as a mechanism to ensure judicial impartiality and independence.

In recent years, the court's docket has been increasingly characterized by complex, high-dollar demands, and high-profile cases in such areas as, for example, the savings and loan crisis of the 1980s, the World War II internment of Japanese-Americans, and the federal repository of civilian spent nuclear fuel.

Nevertheless, despite the nature of the claim, the notability of the claimant, or the amount in dispute, the Court of Federal Claims acts as a clearing house when the government must settle with those it has legally wronged. As observed by former Chief Judge Loren A. Smith, the court is the institutional scale that weighs the government's actions against the standard measure of the law and helps make concrete the spirit of the First Amendment's guarantee of the right "to petition the Government for redress of grievances."[6]

The National Childhood Vaccine Injury Act of 1986 gave the court the authority to create an Office of Special Masters to receive and hear certain vaccine injury cases, and the jurisdiction to review those cases.[7] This vaccine injury jurisdiction has been enlarged in recent years to encompass claims stemming from a number of additional vaccines, including, for example, varicella, hepatitis B, and influenza.

Though a provision of the Administrative Dispute Resolution Act of 1996 gave the Court of Federal Claims and U.S. districts courts concurrent jurisdiction over post-award protests, subsequent legislation provided that, as of January 2001, that the United States Court of Federal Claims would be the exclusive judicial forum for post-award bid protest litigation.

In 2006, the court rendered judgments in more than 900 cases and awarded $1.8 billion in damages. """])

array(['court'], dtype=object)