In [2]:
import pandas as pd
import re
import unicodedata

# Your raw CSV
INPUT_CSV = "9700cases.csv"
OUTPUT_CSV = "9700cases_clean.csv"

df = pd.read_csv(INPUT_CSV)

def clean_text(val):
    if pd.isna(val):
        return ""

    # Normalize Unicode
    val = unicodedata.normalize("NFKD", str(val))

    # Replace newlines with spaces
    val = val.replace("\n", " ").replace("\r", " ")

    # Only allow SAFE ASCII characters
    # Keep: letters, numbers, space, comma, dot, dash, colon, slash,
    # quotes, apostrophe, parentheses, ?, ! 
    val = re.sub(r"[^A-Za-z0-9 ,\.\-\:\;\/\(\)\'\"\?\!]", " ", val)

    # Collapse multiple spaces
    val = re.sub(r"\s+", " ", val)

    return val.strip()

# Clean every column
for col in df.columns:
    df[col] = df[col].apply(clean_text)

df.to_csv(OUTPUT_CSV, index=False)

print("CLEAN CSV SAVED →", OUTPUT_CSV)
df.head()

CLEAN CSV SAVED → 9700cases_clean.csv


Unnamed: 0,Case Title,Link,Issue,Precedent Analysis,Analysis of the law,Fact,Respondent's Argument,Petitioner's Argument,Court's Reasoning,Conclusion
0,The State Of Tamil Nadu vs The Governor Of Tam...,https://indiankanoon.org/docfragment/82729634/...,"J.B. PARDIWALA, J. 8, CONSTITUENT ASSEMB. DEB....","For the convenience of the exposition, this ju...","STATE LIST Entry 32 -- Incorporation, regulati...",3. The Legislature for the State of Tamil Nadu...,"22. The petitioner, on the other hand, has ave...","31. Mr. Rakesh Dwivedi, the learned Senior Cou...",4. This Court issued notice to the respondents...,241. What emerges from the above is that the f...
1,Directorate Of Revenue Intelligence vs Raj Kum...,https://indiankanoon.org/docfragment/92625542/...,"J.B. PARDIWALA, J.: Criminal Appeal No. 1319 o...",16. Aggrieved by the aforesaid order dated 30....,"Thus it being Schedule H drug, it would fall w...",The DRI(HQ) officers recovered 25 corrugated c...,25. It was submitted that the respondent no. 3...,9. The Chemical Examiner of the Central Revenu...,13. The appellant had preferred a petition for...,In these circumstances it would be appropriate...
2,Independent Sugar Corporation Limited vs Giris...,https://indiankanoon.org/docfragment/117249167...,"Hrishikesh Roy, J. FACTUAL MATRIX 1. These are...","2. One key party in this matter is HNGIL i.e.,...",20.1. It was argued that the RP adhered to the...,3. Combining with HNGIL is AGI Greenpac Ltd. h...,"9. On 22.10.2022, CCI declared the application...","18. Dr. Abhishek Manu Singhvi, learned senior ...","42. Where the language is clear, plain and una...",126. The CCI was obligated to issue an appropr...
3,Pinki vs The State Of Uttar Pradesh on 15 Apri...,https://indiankanoon.org/docfragment/173333132...,i. FIR No. 193/2023 (in Sanjay v. State of Utt...,"J. B. PARDIWALA, J. For the convenience of exp...","against Transnational Organised Crime, traffic...",a ) The present case pertains to an interstate...,16. The level of awareness about schemes and p...,6. Most of the Government official stated that...,mechanism for children who are unable to live ...,8. The concerned accused despite service have ...
4,Piramal Capital And Housing Finance ... vs 63 ...,https://indiankanoon.org/docfragment/190999006...,(I) THE DETAILS AND CATEGORIES OF THE APPEALS:...,"3. In these long-drawn proceedings, the Factua...",iii. Section 36 (A) of the NHB Act and Section...,"1. In the captioned Appeals, the contextual fa...",promoters challenging the impugned order dated...,"Hence, the present set of Appeals have been fi...","ix. However, thereafter considering the comple...",45. Keeping in view the above settled legal po...


In [3]:
import pandas as pd
import toons

INPUT_CSV = "9700cases_clean.csv"
OUTPUT_TOON = "9700cases.toon"

df = pd.read_csv(INPUT_CSV)

records = df.to_dict(orient="records")

toon_text = toons.dumps(records)

with open(OUTPUT_TOON, "w", encoding="utf-8") as f:
    f.write(toon_text)

print("TOON SAVED →", OUTPUT_TOON)

TOON SAVED → 9700cases.toon


In [4]:
import toons

with open("9700cases.toon", "r", encoding="utf-8") as f:
    text = f.read()

records = toons.loads(text)
print("Parsed:", len(records), "rows")

Parsed: 9760 rows
