In [1]:
from Py_files.mistral_pipeline_v2 import MistralInference
import torch
import pandas as pd

In [2]:
# Sample text from Queens Unofficial Transcript
text = """
APSC 221 Economic And Business Practice 3.00 A 12.0
ELEC 210 Intro Elec Circuits & Machines 4.00 A- 14.8
MINE 244 Underground Mining 3.00 Bt 9.9
MINE 267 App Chem/Instrument Meth Mine 4.00 A- 14.8
MTHE 272 Applications Numerical Methods. 3.50 At 15.0
MTHE 367 Engineering Data Analysis 4.00 B- 10.8
APSC 100A Engineering Practice 0.00 NG 0.0
APSC 111 Mechanics 3.50 A 14.0
APSC 131 Chemistry And Materials 3.50 A 14.0
APSC 151 Earth Systems And Engineering 4.00 At 17.2
APSC 161 Engineering Graphics 3.50 A 14.0
APSC 171 Calculus | 3.50 At 15.0
APSC 200 Engr Design & Practice 4.00 At 17.2
APSC 293 Engineering Communications 1.00 At 4.
CIVL 230 Solid Mechanics | 4.25 At 18.
MECH 230 Applied Thermodynamics | 3.50 At 15.
MINE 201 Intro To Mining/Mineral Proces 4.00 A 16.(
MINE 202 Comp Apps/Instrumntn In Mining 1.50 A 6.(
MTHE 225 Ordinary Differential Equation 3.50 A 14.(
APSC  100B Engineering Practice 11.00 A- 40.7
APSC 112 Electricity And Magnetism 3.50 B+ 11.6
APSC 132 Chemistry And The Environment 3.50 B 10.5
APSC 142 Intro Computer Program Engrs 3.00 A- 11.1
APSC 172 Calculus II 3.50 A- 13.0
APSC 174 Introduction To Linear Algebra 3.50 At 15.0
CLST 201 Roman History 3.00 At 12.¢
ECON 111 Introductory Microeconomics 3.00 A- 11.1
MINE 321 Drilling & Blasting 4.50 A- 16.6
MINE 331 Methods Of Mineral Separation 4.50 A- 16.€
MINE 339 Mine Ventilation 4.50 Ct 10.4
MINE 341 Open Pit Mining 4.50 A- 16.6
Academic Program History
06/12/2012: Bachelor of Science Engineer Active in Program
Major in General Engineering
02/28/2013: Bachelor of Science Engineer Active in Program
Major in Mining Engineering
Option in Mining
12/09/2014: Bachelor of Arts Active in Program
Term GPA 3.51. Term Totals 24.00 24.00 84.3
Term GPA 3.60 Term Totals 21.50 21.50 774
Term GPA 4.18 Term Totals 21.75 21.75 90.8
Term GPA 3.64 Term Totals 28.00 28.00 101.8
Term GPA 4.13 Term Totals 18.00 18.00 74.2
"""
# Sample Headers from Queens Unofficial Transcript
headers = """
Course Description Units Grade _— Points
Course Description Units Grade Points
Course Description Units Grade Points
Course Description Units Grade _— Points
Course Description Units Grade Points
"""

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize Pipeline
mistral_pipeline = MistralInference(device = device)
# Run inference
# Query Mistral with initial text
initial_csv_output = mistral_pipeline.query_mistral(headers, text)
print("Initial_CSV_Output:\n", initial_csv_output)
# Convert to df
initial_df = mistral_pipeline.string_to_dataframe(initial_csv_output)

# Filter out identified course lines from the original text
remaining_text = mistral_pipeline.filter_unidentified_courses(initial_csv_output, text)

# Check for missing courses
filtered_text = mistral_pipeline.check_missing_courses(headers, remaining_text)
print("Checked CSV Output:\n", filtered_text)

if filtered_text.lower() != "none":
    additional_df = mistral_pipeline.string_to_dataframe(filtered_text)
    # Append new rows to the initial DataFrame
    final_df = pd.concat([initial_df, additional_df], ignore_index=True)
else:
    final_df = initial_df

# Fix OCR issues with grade pluses
final_df = mistral_pipeline.fix_ocr_pluses(final_df, "Grade")


Creating new instance of MistralInference
Using 3 CPU threads.
Offloading 5 layers to GPU.
-----------------------------------------------------------------------
Loading Mistral to cuda...
-----------------------------------------------------------------------
Prompting Mistral...
Prompt Successful!
Initial_CSV_Output:
 Course Code,Grade,Credits
APSC 221,A,3.0
ELEC 210,A-,4.0
MINE 244,Bt,3.0
MINE 267,A-,4.0
MTHE 272,At,3.5
MTHE 367,B-,4.0
APSC 100A,NG,0.0
APSC 111,A,3.5
APSC 131,A,3.5
APSC 151,At,4.0
APSC 161,A,3.5
APSC 171,At,3.5
APSC 200,At,4.0
CIVL 230,At,4.25
MECH 230,At,3.5
MINE 201,A,4.0
MINE 202,A,1.5
MTHE 225,A,3.5
APSC  100B,A-,11.0
APSC 112,B+,3.5
APSC 132,B,3.5
APSC 142,A-,3.0
APSC 172,A-,3.5
APSC 174,At,3.5
CLST 201,At,3.0
ECON 111,A-,3.0
MINE 321,A-,4.5
MINE 331,A-,4.5
MINE 339,Ct,4.5
MINE 341,A-,4.5
Asking Mistral to Check it's Work...
Check Complete.
Checked CSV Output:
 APSC 293,4.0,1.00
         None


In [4]:
final_df2 = pd.concat([initial_df, additional_df], ignore_index=True, axis=1)
final_df2

Unnamed: 0,0,1,2,3,4,5
0,APSC 221,A,3.0,,,
1,ELEC 210,A-,4.0,,,
2,MINE 244,Bt,3.0,,,
3,MINE 267,A-,4.0,,,
4,MTHE 272,At,3.5,,,
5,MTHE 367,B-,4.0,,,
6,APSC 100A,NG,0.0,,,
7,APSC 111,A,3.5,,,
8,APSC 131,A,3.5,,,
9,APSC 151,At,4.0,,,


In [5]:
remaining_text = """
APSC 293 Engineering Communications 1.00 At 4.
Academic Program History
06/12/2012: Bachelor of Science Engineer Active in Program
Major in General Engineering
02/28/2013: Bachelor of Science Engineer Active in Program
Major in Mining Engineering
Option in Mining
12/09/2014: Bachelor of Arts Active in Program
Term GPA 3.51. Term Totals 24.00 24.00 84.3
Term GPA 3.60 Term Totals 21.50 21.50 774
Term GPA 4.18 Term Totals 21.75 21.75 90.8
Term GPA 3.64 Term Totals 28.00 28.00 101.8
Term GPA 4.13 Term Totals 18.00 18.00 74.2
"""

In [9]:
print(remaining_text)
# Check for missing courses
filtered_text = mistral_pipeline.check_missing_courses(headers, remaining_text)
print(filtered_text)


APSC 293 Engineering Communications 1.00 At 4.
Academic Program History
06/12/2012: Bachelor of Science Engineer Active in Program
Major in General Engineering
02/28/2013: Bachelor of Science Engineer Active in Program
Major in Mining Engineering
Option in Mining
12/09/2014: Bachelor of Arts Active in Program
Term GPA 3.51. Term Totals 24.00 24.00 84.3
Term GPA 3.60 Term Totals 21.50 21.50 774
Term GPA 4.18 Term Totals 21.75 21.75 90.8
Term GPA 3.64 Term Totals 28.00 28.00 101.8
Term GPA 4.13 Term Totals 18.00 18.00 74.2

Below is OCR text from a student transcript. This text maight contain student grade data. Determine if there is course information and corresponding grades from this data. If there is, select lines only relevant to student courses and grades and format the fields into a table in csv format. Some extracted table headers are given below to help with formatting. The csv you output should only have 3 columns: 'Course Code', 'Grade', and 'Credits', you must select which c