In [1]:
from PyPDF2 import PdfFileReader
import re
import os
import string
import pandas as pd
import numpy as np
import docx
import datetime
from docx.shared import Inches
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

In [2]:
# step # 1
def step_1_find_files_extension(extension,path):
    os.getcwd()
    all_files = os.listdir(path)
    files = [file for file in all_files if file.lower().endswith((extension))]
    return files

In [3]:
# step 2
def step_2_read_pdf_files(files,path):
    # a list that contain all the str(file texts)
    list_of_files = []
    # opening all pdf files in the current directory
    for pdf_file in files:
        file = open(path+pdf_file,'rb')
        # reading those files
        readed_file = PdfFileReader(file)
        # checking if file is encrypted
        if not readed_file.isEncrypted:
            # a str to save all text of pdf file 
            file_1 = ' '
            # counting total pages in the pdf file
            total_pages = readed_file.getNumPages()
#           print(f"Total no of Pages = {total_pages} in {pdf_file}")
            # extracting text from every page one by one 
            for page in range(total_pages):
                # getting page number 
                page_obj = readed_file.getPage(page)
                # extracting page and save them in a str
                file_1 += page_obj.extractText()
               # print(file_1)
            # saveing the complete text of file into a list
            list_of_files.append(file_1)
            file.close()
        else:
            print('File is Locked')
    return list_of_files
def step_2_read_word_files(files,path):
    list_of_files_2 = []
    for file in files:
        file_text = ''
        doc = docx.Document(path+file)
        for i in doc.paragraphs:
            file_text += i.text
        list_of_files_2.append(file_text)
    return list_of_files_2    

In [4]:
# Data Cleansing Includes

# 1: remove Punctuations 
# 2: Tokanization
# 3: remove stop words
# 6: remove two-length words
# 4: stemming
# 5: lemmatization
# using re and nltk


def text_cleaner(string):
    #remove \n
    clean0 = string.replace('\n','')
    # remove punc
    clean1 = re.sub('[^a-zA-Z]',' ',clean0)
    # tokenization
    words = word_tokenize(clean1)
    # removing stop words
    stop_words = set(stopwords.words('english'))
    clean4 = [w for w in words if not w in stop_words]
    # two-length words
    clean5 = [w for w in clean4 if len(w) > 2]
    # normalization
    clean6 = [word.lower() for word in clean5]
    # stemming
#     stemmer = PorterStemmer()
#     clean7 = [stemmer.stem(word) for word in clean6]
    # lemmatization
    lemmer = WordNetLemmatizer()
    clean8_list = [lemmer.lemmatize(w) for w in clean6]
    return clean8_list
    

In [5]:
# cleaning all the files in the list of files
def step_3_clean_all_files(list_of_files):
    clean_list_of_files = [text_cleaner(file_) for file_ in list_of_files]
    return clean_list_of_files

In [6]:
def step_4_name_files_with_student_names(files_names,cleaned_files):
    cleaned_pdf_files_with_student_names = {}
    for i in range(len(files_names)):
        cleaned_pdf_files_with_student_names[files_names[i]] = cleaned_files[i]
    return cleaned_pdf_files_with_student_names    

In [7]:
def find_matched_words_and_synonyms(student1,cleaned_text_1,student2,cleaned_text_2,plague_threshould = 0.2):    
    matched_words = {}
    matched_synonyms = {}
    cleaned_text_1_unique = list(set(cleaned_text_1))
    cleaned_text_2_unique = list(set(cleaned_text_2))
#     if len(cleaned_text_1_unique) <= len(cleaned_text_2_unique):
    for word1 in cleaned_text_1_unique:
        for word2 in cleaned_text_2_unique:
            if word1 == word2:
                matched_words[word1] = word2 
            else:
                for syn in wordnet.synsets(word2): 
                    if syn.lemmas():
                        for l in syn.lemmas(): 
                            if word1 == l.name() and word2 != l.name():
                                matched_synonyms[word1] = word2

        
        wplagued = len(matched_words.keys()) / len(cleaned_text_1_unique) 
        splagued = len(matched_synonyms.keys()) / len(cleaned_text_1_unique)
#         tplagued = wplagued * splagued 
     
#     if len(cleaned_text_1_unique) > len(cleaned_text_2_unique):
#         for word1 in cleaned_text_2_unique:
#             for word2 in cleaned_text_1_unique:
#                 if word1 == word2:
#                     matched_words[word1] = word2 
#                 else:
#                     for syn in wordnet.synsets(word2): 
#                         if syn.lemmas():
#                             for l in syn.lemmas(): 
#                                 if word1 == l.name() and word2 != l.name():
#                                     matched_synonyms[word1] = word2
                                    
#         wplagued = len(matched_words.keys()) / len(cleaned_text_2_unique) 
#         splagued = len(matched_synonyms.keys()) / len(cleaned_text_2_unique)
# #         tplagued = wplagued * splagued
        
    if wplagued >= plague_threshould:
        return {'Student_1':student1,'Student_2':student2,
                'Word Matched Plagued(%)': wplagued * 100 ,'Synonyms Matched Plagued(%)': splagued * 100 ,
                'Matched_words': matched_words,'Matched_Synonyms':matched_synonyms} 

In [8]:
def step_5_find_plagues(cleaned_file_with_student_name, plague_threshould = 0.2):
    result = []
    prev_stud_1 = None
    prev_stud_2 = None
    for student1,compare_from_file in cleaned_file_with_student_name.items():
        for student2,compare_to_file in cleaned_file_with_student_name.items():
            if student1 != student2 and prev_stud_1 != student2 and prev_stud_2 != student1:
                prev_stud_1,prev_stud_2 = student1,student2
                res = find_matched_words_and_synonyms(student1,compare_from_file,student2,compare_to_file,
                                                             plague_threshould=plague_threshould)
                if res:
                    result.append(res)
    if not result:
        print(f"No Student did {plague_threshould * 100} % Plagued")
        return result
    else:
        return result            

In [9]:
def step_6_make_plagued_detailed_report_of_all_students(result_pdf,result_docx):
    path1 = "Plagiarism_Reports"
    if not os.path.isdir("Plagiarism_Reports"):
        os.mkdir(path1)
    if result_pdf and not result_docx:
        result_docx = dict()
    elif result_docx and not result_pdf:
        result_pdf = dict()
    elif not result_pdf and not result_docx:
        result_docx = dict()
        result_pdf = dict()
        
    df_pdf = pd.DataFrame(result_pdf)
    df_doc = pd.DataFrame(result_docx)
    df_complete = pd.concat([df_pdf,df_doc])
    df_complete.reset_index(inplace=True)
    df_complete.drop('index',axis=1,inplace=True)
    df_complete.to_csv("Plagiarism_Reports/Detailed_Plagued_Report.csv")
    print("Success! Detailed Report has been created in  Plagiarism_Reports  directry")
    return df_complete

In [29]:
def create_student_file(**kwargs):
    
    document = docx.Document()
    now = datetime.datetime.now()
    time = now.strftime("%Y-%m-%d %I:%M-%p")
    document.add_heading(f'Plagued Report',level = 0)
    document.add_heading(f"Processed on :",level=1)
    document.add_paragraph(f"{time}")

    document.add_heading(f"Student Name (who plagued) :",level=1)
    document.add_paragraph(f"{kwargs['Student_1']}")

    document.add_heading(f"Student Name (whom plagued) :",level=1)
    document.add_paragraph(f"{kwargs['Student_2']}")                         

    document.add_heading(f"Word Matched Plagued in percentage :",level=1)
    document.add_paragraph(f"{kwargs['Word Matched Plagued(%)']}")

    document.add_heading(f"Synonyms Matched Plagued in percentage :",level=1)
    document.add_paragraph(f"{kwargs['Synonyms Matched Plagued(%)']}")

    document.add_heading('Matched words:', level=1)
    document.add_paragraph(f"{kwargs['Matched_words']}") # , style='Intense Quote')

    document.add_heading('Total Matched words:', level=1)
    document.add_paragraph(f"{len(kwargs['Matched_words'])}") # , style='Intense Quote')

    document.add_heading('Matched Synonyms:', level=1)
    document.add_paragraph(f"{kwargs['Matched_Synonyms']}") # , style='Intense Quote')

    document.add_heading('Total Matched Synonyms:', level=1)
    document.add_paragraph(f"{len(kwargs['Matched_Synonyms'])}") # , style='Intense Quote')
    document.save(f'Plagiarism_Files/{kwargs["Student_1"]}.docx')

# document.add_paragraph(
#     'first item in unordered list', style='List Bullet'
# )
# document.add_paragraph(
#     'first item in ordered list', style='List Number'
# )

# records = (
#     (3, '101', 'Spam'),
#     (7, '422', 'Eggs'),
#     (4, '631', 'Spam, spam, eggs, and spam')
# )

# table = document.add_table(rows=1, cols=3)
# hdr_cells = table.rows[0].cells
# hdr_cells[0].text = 'Qty'
# hdr_cells[1].text = 'Id'
# hdr_cells[2].text = 'Desc'
# for qty, id, desc in records:
#     row_cells = table.add_row().cells
#     row_cells[0].text = str(qty)
#     row_cells[1].text = id
#     row_cells[2].text = desc



In [25]:
def step_7_make_plagued_brief_report_of_all_students(detailed_report):
    if not os.path.isdir("Plagiarism_Reports"):
        os.mkdir(path1)
    highest_df = dict(detailed_report.groupby(['Student_1'])['Word Matched Plagued(%)'].max())
    name,plagued = list(highest_df.keys()),list(highest_df.values())
    data = pd.DataFrame([])
    for nam,key in zip(name,plagued):
        df = pd.DataFrame(data=detailed_report[(detailed_report['Student_1'] == nam) & (detailed_report['Word Matched Plagued(%)'] == key)])
        data = data.append(df , ignore_index=True)
    data.to_csv("Plagiarism_Reports/brief_Plagued_Report.csv")
    print("Success! Brief Plagued Report has been created in your current directry")    
    return data   

In [12]:
# students,datasets = [],[]
#     students.append('Student_1')
#     students.append('Student_2')
#     path1 = "Plagiarism_Reports"
#     if not os.path.isdir("Plagiarism_Reports"):
#         os.mkdir(path1)
#     for student in students:
#         highest_df = dict(detailed_report.groupby([student])['Word Matched Plagued'].max())
#         name,plagued = list(highest_df.keys()),list(highest_df.values())
#         data = pd.DataFrame([])
#         for nam,key in zip(name,plagued):
#             df = pd.DataFrame(data=detailed_report[(detailed_report[student] == nam) & (detailed_report['Word Matched Plagued'] == key)])
#             data = data.append(df , ignore_index=True)
#         data.to_csv(f"Plagiarism_Reports/brief_Plagued_Report_of_{student}.csv")
#         datasets.append(data)
#     return datasets 


In [13]:
def step_8_make_plagued_file_of_individual_students(brief_report):
    count = 0
    path_ = "Plagiarism_Files"
    if not os.path.isdir("Plagiarism_Files"):
        os.mkdir(path_)       
    for i in range(len(brief_report)):
        one_row = dict(brief_report.iloc[i,:])
        create_student_file(**one_row)
        count += 1 
    return f"{count} files has been created"

In [32]:
# find files in current directory
files_pdf = step_1_find_files_extension('.pdf',path = "students_files/")
# # reading files
readed_pdf_files = step_2_read_pdf_files(files_pdf, path ="students_files/")
#  cleaning files
cleaned_pdf_files = step_3_clean_all_files(readed_pdf_files)

# name files with student names
# pdf files with student names
cleaned_files_with_names = step_4_name_files_with_student_names(files_names=files_pdf,cleaned_files=cleaned_pdf_files)

# find plages of these files
result_pdf =  step_5_find_plagues(cleaned_files_with_names,plague_threshould=0.0)

No Student did 0.0 % Plagued


In [33]:
# find files in current directory
files_docx = step_1_find_files_extension(('docx'),path="students_files/")

In [34]:
#  reading files
readed_docx_files = step_2_read_word_files(files_docx,path = "students_files/")

In [35]:
#  cleaning files 
cleaned_docx_files = step_3_clean_all_files(readed_docx_files)

In [36]:
# name files with student names
# docx files with student names
cleaned_files_with_names = step_4_name_files_with_student_names(files_names=files_docx,cleaned_files=cleaned_docx_files)

In [20]:
# find plages of these files
# result_docx =  step_5_find_plagues(cleaned_files_with_names,plague_threshould=0.7)

In [37]:
result_docx =  step_5_find_plagues(cleaned_files_with_names,plague_threshould=0.75)

In [39]:
detailed_report = step_6_make_plagued_detailed_report_of_all_students(result_pdf,result_docx)

Success! Detailed Report has been created in  Plagiarism_Reports  directry


In [40]:
brief_report = step_7_make_plagued_brief_report_of_all_students(detailed_report=detailed_report)

Success! Brief Plagued Report has been created in your current directry


In [41]:
student_files = step_8_make_plagued_file_of_individual_students(brief_report=brief_report)

In [42]:
student_files

'21 files has been created'

In [None]:
files

In [None]:
highest_df = dict(detailed_report.groupby(['Student_2'])['Word Matched Plagued'].max())

In [None]:
name,plagued = list(highest_df.keys()),list(highest_df.values())
data_2 = pd.DataFrame([])
for nam,key in zip(name,plagued):
    df = pd.DataFrame(data=detailed_report[(detailed_report['Student_2'] == nam) & (detailed_report['Word Matched Plagued'] == key)])
    data_2 = data_2.append(df , ignore_index=True)
# data.to_csv("brief_Plagued_Report_of_Who_Plagued.csv")
# print("Success! Brief Plagued Report has been created in your current directry")  

In [None]:
df = pd.read_csv('Plagiarism_Reports/Detailed_Plagued_Report.csv')

In [None]:
res = df.groupby(['Student_1'])['Word Matched Plagued'].max()

In [None]:

highest_df = dict(detailed_report.groupby(['Student_1'])['Word Matched Plagued'].max())
name,plagued = list(highest_df.keys()),list(highest_df.values())
data = pd.DataFrame([])
for nam,key in zip(name,plagued):
    df = pd.DataFrame(data=detailed_report[(detailed_report['Student_1'] == nam) & (detailed_report['Word Matched Plagued'] == key)])
    data = data.append(df , ignore_index=True)
data.to_csv("Plagiarism_Reports/brief_Plagued_Report.csv")
print("Success! Brief Plagued Report has been created in your current directry")    
return data   

In [None]:
# for result in result_pdf:
#     for name,value in result.items():
#         print(f"{name} -> {value}")
#     print("\n") 

In [None]:
for result in result_docx:
    for name,value in result.items():
        print(f"{name} -> {value}")
    print("\n") 

In [None]:
step_4_prepare_dataset(result_pdf=result_pdf,result_docx=result_docx)

In [None]:
str1 = '''Computer Networks
Assignment 1:
1. What is a network protocol? Explain why are standards important for protocols?

Network Protocols:
Network protocols are formal standards and rules, procedures and formats that define communication between two or more devices over a network.
Standards are important for protocols as they are rules which are used by different devices to communicate effectively without said rules devices would not be able to communicate with each other.

2. Differentiate between unicast, multicast and broadcast in computer networks. Give an example of a situation in which multicast addresses might be beneficial.
Unicast:
It refers to such a transmission on a network where there is only one sender and receiver. It is a one-to-one transmission from one end of the network to the other.
Multicast:
In multicast there are one or more senders and one or more receivers on a network. In this network traffic follows either one to all or all to one transmission.
Broadcast:
Broadcasting is a one to all transmission method there are different types of broadcasts.
Limited Broadcasting: 
	Limited broadcast is the broadcast limited to a single LAN and which is to be received by all.
Direct Broadcasting:
	This is useful when a device in one network wants to transfer packet stream to all the devices over the other network.

3. For each of the following four networks, discuss the consequences if a connection fails.
a. Five devices arranged in a mesh topology
Only one computer will go offline from network
b. Five devices arranged in a star topology (not counting the hub)
Only one computer goes offline from network
c. Five devices arranged in a bus topology
if one computer fails it will not affect the network the failed computer wont be able to send any packets.
D. Five devices arranged in a ring topology 
If one computer fails in a ring topology then all computers stop receiving data as it has to go through the failed computer.

4. What is Hub, switch and Router? Differentiate between their usages in a network. Explain your answer with an example.
Hub:
A device which is used to connect different computers on a network. It is mostly used in local area networks where there is small number of computers. The hub does not have any software on it. It is only hardware.
Switch:	
A switch is a device which connects different computers but it has software on board. A switch used a switching table which has mac addresses of the computers connected to it which it uses to identify each computer. Also used in local area networks such as labs.
Router:
A router is device which is used to connect multiple networks it is the more advanced of the last two devices as it is used everywhere. A router has a routing table which has ip addresses of the computers on the network which is used to identify each computer instead of mac address uniquely. Routers are widely used and currently the internet uses routers to connect different networks.

5. Compare the OSI and TCP/IP model. Give at least one example of each.
OSI Model:
OSI stands for open systems interconnection model.
It is more of a theoretical reference model that is used to guide developers so the software programs they create can inter operate, and to facilitate a clear framework that describes the functions of a networking system. It has two additional layers than tcp/ip model.
OSI model has seven layers that are:
•	Application
•	Presentation
•	Session
•	Transmission
•	Network
•	Data-link
•	Physical

TCP/IP Model:
	TCP/IP model stands for transfer control protocol/Internet protocol.	
Unlike OSI model this model only has 5 layers excluding the two additional layers in OSI.
TCP/IP is widely used instead of OSI. 
TCP/IP has five layers which are:
•	Application Layer
•	Transport Layer
•	Network Layer
•	Data-Link Layer
•	Physical Layer
6. What are end systems in a network? Why are they called hosts? List several different types of end systems?
	End systems are the systems that are on the edge of a network.
	End systems are also called hosts because they host networking applications.
	Different types of end systems include Laptops, Computers and Mobiles etc.

7. Differentiate between DSL and Cable Networks. Suppose both the networks use same quality of cable, then which network provides a better internet connection and why?
	DSL:
		DSL stands for digital subscriber line. DSL uses existing phone network to provide internet. DSL provides internet as-well as voice data. Data over DSL goes to the internet and voice goes to the telephone net.
	Cable Network:
		Cable network provides internet via television cable networks. It uses coaxial cables to deliver internet and television data. Cable networks unlike DSL don’t have direct access to the central office.
Its primary disadvantage is that you're sharing bandwidth with neighbors who are using the same cable line who can then see your traffic.
If both use same type of cables then DSL would be better if it uses coaxial cable.
8. What is an ISP? How can a subscriber of an ISB connect to its ISP connection?
Explain with an example.
	An ISP is an internet service provider which provides internet to its customers.  An Internet service provider (ISP) is a company that provides customers with Internet access. Data may be transmitted using several technologies, including dial-up, DSL, cable modem, wireless or dedicated high-speed interconnects.

For example a subscriber receives a router/access point to connect to the internet from their home which either uses a telephone line or a fiber line.

9. Differentiate between multipoint and point to point connection. What are the advantages of a multipoint connection over a point-to-point connection?  	
When there is a single dedicated link only between two devices, it is a point-to-point connection whereas, if a single link is shared by more than two devices then it is said to be a multipoint connection. In multipoint connection the channel capacity is shared temporarily by the devices in connection.
Advantages of multipoint over point to point:
The advantages of a multipoint connection over a point-to-point connection are 
•	Ease of installation
•	Low cost
•	Reliability




10. What is the difference between half-duplex and full-duplex transmission modes?
Give examples of practical applications of both modes.
 	

Half-Duplex:
		Half-duplex data transmission of data means that data is transmitted in just one direction at a time. For example, only one person can send data at a time and the other can receive both can’t send data at the same time.
Full-Duplex:
	Full-duplex data transmission means that data can be transmitted in both directions on a signal carrier at the same time. For example, on a local area network with full-duplex transmission, one computer can be sending data on the line while another computer is receiving data.



11. Compare FDM and TDM techniques in circuit switching with their advantages and dis-advantages. Which technique should be preferred to serve a large number of users and why?
	FDM:
		FDM stands for frequency division multiplexing. In this method a single wire is divided into multiple frequencies to send different data.
Advantages:
	FDM proves much better latency compared to TDM.
Disadvantages:
	Less flexible than TDM allocated frequency cannot be dynamically changes.
TDM:
		TDM stands for time division multiplexing TDM divides and allocates certain time periods to each channel for sending and receiving data
Advantages:
TDM has greater flexibility and efficiency, by dynamically allocating more time periods to the signals that need more of the bandwidth, while reducing the time periods to those signals that do not need it.
Disadvantages:
	Only one channel can transmit at a given time and it has more latency

FDM performs better for a larger number of users than TDM since each signal uses only a small number of bandwidth at a time so it can accommodate larger users.





'''

str2 = '''Operating systems can be viewed from two viewpoints resource managers and extended machines. In the resource manager view, the operating system's job is to manage the different parts of the system efficiently. 
In the extended machine view, the job of the system is to provide the users with abstractions that include processes, address spaces, and files. 
OS (Operating System) is a System Software, which is used manage computer Resources in Efficient, Reliable and Secure methods. It is the interaction between User and Computer '''

In [None]:
str1

In [None]:
str2

In [None]:
cleaned_text_1 = text_cleaner(str1)
cleaned_text_2 = text_cleaner(str2)

In [None]:
cleaned_text_1_unique = list(set(cleaned_text_1))
cleaned_text_2_unique = list(set(cleaned_text_2))

In [None]:
matched_words = {}
matched_synonyms = {}
if len(cleaned_text_1_unique) <= len(cleaned_text_2_unique):
    for word1 in cleaned_text_1_unique:
        for word2 in cleaned_text_2_unique:
            if word1 == word2:
                matched_words[word1] = word2 
            else:
                for syn in wordnet.synsets(word2): 
                    if syn.lemmas():
                        for l in syn.lemmas(): 
                            if word1 == l.name() and word2 != l.name():
                                matched_synonyms[word1] = l.name()
    prob = len(matched_words.keys()) / len(cleaned_text_1_unique)

In [None]:
prob

In [None]:
cleaned_text_1

In [None]:
# print(count)
matched_words
matched_synonyms
len(list(matched_words.keys()))

In [None]:
len(matched_words.keys()) / len(cleaned_text_1)

In [None]:
31/48

In [None]:
synonyms_f1 = {} 
synonyms_f2 = {} 

for word in cleaned_text_1:
    for syn in wordnet.synsets(word):
        if syn.lemmas():
            synonyms_f1[word] = syn.lemmas()[0].name() 
        else:
            synonyms_f1[word] = word

for word in cleaned_text_2:
    for syn in wordnet.synsets(word):
        if syn.lemmas():
            synonyms_f2[word] = syn.lemmas()[0].name() 
        else:
            synonyms_f2[word] = word

            
# print(set(synonyms)) 
print(synonyms_f1)
# len(synonyms)
print(synonyms_f2)

In [None]:
sorted_synoyms_1 = dict(sorted(synonyms_f1.items()))

In [None]:
sorted_synoyms_2 = dict(sorted(synonyms_f2.items()))

In [None]:
list(sorted_synoyms_1.values())

In [None]:
# # word to word match 
# seq = SequenceMatcher(None,a=list(sorted_synoyms_1.keys()),b=list(sorted_synoyms_2.keys()))
# print(seq.ratio())

# word to synonyms
seq2 = SequenceMatcher(None,a=list(cleaned_pdf_files_with_syn[1].keys()),b=list(cleaned_pdf_files_with_syn[2].values()))
print(seq2.ratio())

In [None]:
seq2 = SequenceMatcher(None,a=f1,b=f2)
print(seq2.ratio())

In [None]:
seq.real_quick_ratio() # upper bound of plegued

In [None]:
list(seq.get_matching_blocks())

In [None]:
# for block in s.get_matching_blocks():
#  |  ...     print("a[%d] and b[%d] match for %d elements" % block)
#  |  a[0] and b[0] match for 8 elements
#  |  a[8] and b[17] match for 21 elements
#  |  a[29] and b[38] match for 0 elements

# find_longest_match(alo, ahi, blo, bhi)
#  |      Find longest matching block in a[alo:ahi] and b[blo:bhi]

# real_quick_ratio()
#  |      Return an upper bound on ratio() very quickly

In [None]:
f1[0:0+25] == f2[0:0+25]
print(f1[0:0+25])
print(f2[0:0+25])

In [None]:
f1[25:25+22] == f2[26:26+22]
print(f1[25:25+22])
print(f2[26:26+22])

In [None]:
f1[48:48+0] == f2[49:49+0]
print(f1[48:48+0])
print(f2[49:49+0])

(i, j, n)
a[i:i+n] == b[j:j+n]

In [None]:
# ratio(self)
#  |      Return a measure of the sequences' similarity (float in [0,1]).
#  |      
#  |      Where T is the total number of elements in both sequences, and
#  |      M is the number of matches, this is 2.0*M / T.
#  |      Note that this is 1 if the sequences are identical, and 0 if
#  |      they have nothing in common.
# match = fuzz.SequenceMatcher(None,a=list(c),b=list(c2))

In [None]:
import nltk 
from nltk.corpus import wordnet 
synonyms = [] 
antonyms = [] 

for syn in wordnet.synsets("good"): 
	for l in syn.lemmas(): 
		synonyms.append(l.name()) 
		if l.antonyms(): 
			antonyms.append(l.antonyms()[0].name()) 

print(set(synonyms)) 
print(set(antonyms)) 


In [None]:
syn = wordnet.synsets("")

In [None]:
student_name_1 = "Khizar Sultan"
student_name_2 = "Ali Shahbaz"

word_plagued = "100%"
syn_plagued = "26.0%"

list_l = list("abcdefghhi")

In [None]:
document = docx.Document()
now = datetime.datetime.now()
time = now.strftime("%Y-%m-%d %I:%M-%p")
document.add_heading(f'Plagued Report',level = 0)
document.add_heading(f"Processed on : {time}",level=2)
document.add_heading(f"Student Name (who plagued) : {student_name_1}",level=2)
document.add_heading(f"Student Name (whom plagued) : {student_name_2}",level=2)
document.add_heading(f"Word Matched Plagued in percentage : {word_plagued}",level=2)
document.add_heading(f"Synonyms Matched Plagued in percentage : {syn_plagued}",level=2)

document.add_heading('Matched Words:', level=1)
document.add_paragraph(f'{list_l}') # , style='Intense Quote')

document.add_heading('Matched Synonyms:', level=1)
document.add_paragraph(f'{list_l}') # , style='Intense Quote')



# document.add_paragraph(
#     'first item in unordered list', style='List Bullet'
# )
# document.add_paragraph(
#     'first item in ordered list', style='List Number'
# )

# records = (
#     (3, '101', 'Spam'),
#     (7, '422', 'Eggs'),
#     (4, '631', 'Spam, spam, eggs, and spam')
# )

# table = document.add_table(rows=1, cols=3)
# hdr_cells = table.rows[0].cells
# hdr_cells[0].text = 'Qty'
# hdr_cells[1].text = 'Id'
# hdr_cells[2].text = 'Desc'
# for qty, id, desc in records:
#     row_cells = table.add_row().cells
#     row_cells[0].text = str(qty)
#     row_cells[1].text = id
#     row_cells[2].text = desc

document.add_page_break()

document.save('demo.docx')