# difflib

## Differ

### Differ class methods and variables

In [1]:
from difflib import Differ

d = [i for i in dir(Differ) 
     if not i.startswith('_')]
print('These are Variables of Differ module:')
print('*'*35)
for x in d:
    if not callable(getattr(Differ, x)):
        print(x)
else:
    print()

print('These are Methods of Differ module:')
print('*'*35)
for x in d:
    if callable(getattr(Differ, x)):
        print(x)

These are Variables of Differ module:
***********************************

These are Methods of Differ module:
***********************************
compare


### Differ Methods

### compare

### Comparing two files

In [5]:
!cat /home/mana/Work/file2.txt

This is file2 lists:
1. red Apple
2. Green Apple
3. white Apple


In [4]:
!cat /home/mana/Work/file1.txt

This is file1 list:
1. Red Apple
2. Green Aple


In [8]:
from pathlib import Path

f1 = Path('/home/mana/Work/file1.txt').read_text().splitlines()
f2 = Path('/home/mana/Work/file2.txt').read_text().splitlines()

diff = Differ().compare(f1,f2)
print(*diff, sep = '\n')

- This is file1 list:
?             ^

+ This is file2 lists:
?             ^     +

- 1. Red Apple
?    ^

+ 1. red Apple
?    ^

- 2. Green Aple
+ 2. Green Apple
?            +

  3. white Apple


In [6]:
!cat /home/mana/Work/sample1.txt

I need to buy apples.
I need to run the laundry.
I need to wash the dog.
I need to get the car detailed.


In [7]:
!cat /home/mana/Work/sample2.txt

I need to do the laundry.
I need to buy apples.
I need to wash the car.
I need to get the dog detailed.


### **How to find similar lines?**

In [8]:
from difflib import Differ
from pathlib import Path

P = Path('/home/mana/Work')
f1 = (P/'sample1.txt').read_text().splitlines()
f2 = (P/'sample2.txt').read_text().splitlines()
diff = Differ().compare(f1,f2)
same_lines = [x for x in diff if x.startswith(' ')]
print(*same_lines)

  I need to buy apples.


### **ndiff**

In [9]:
from difflib import ndiff
from pathlib import Path

P = Path('/home/mana/Work')
f1 = (P/'sample1.txt').read_text().splitlines()
f2 = (P/'sample2.txt').read_text().splitlines()
[i for i in ndiff(f1, f2)]


['+ I need to do the laundry.',
 '  I need to buy apples.',
 '- I need to run the laundry.',
 '- I need to wash the dog.',
 '?                    ^^^\n',
 '+ I need to wash the car.',
 '?                    ^^^\n',
 '- I need to get the car detailed.',
 '?                   ^^^\n',
 '+ I need to get the dog detailed.',
 '?                   ^^^\n']

### **How to find different lines?**

In [4]:
from difflib import ndiff
from pathlib import Path

P = Path('/home/mana/Work')
f1 = (P/'sample1.txt').read_text().splitlines()
f2 = (P/'sample2.txt').read_text().splitlines()
diff = ndiff(f1, f2)
delta = [x.replace('- ','') for x in diff if x.startswith('-')]

print(*delta, sep = '\n')

I need to run the laundry.
I need to wash the dog.
I need to get the car detailed.


## SequenceMatcher

In [1]:
from difflib import SequenceMatcher
w1 = 'lease extension'
w2 = 'extension lease'
word_match = SequenceMatcher(None,w1,w2) 
wm = word_match.find_longest_match(0, len(w1), 0, len(w2)) 
print(w1[wm.a:wm.a+wm.size])

extension


In [2]:
from difflib import SequenceMatcher
w1 = 'lease extension'
w2 = 'extension lease'
word_match = SequenceMatcher(None,w1,w2).get_matching_blocks()

for wm in word_match:
    print(w1[wm.a:wm.a+wm.size])

extension



### How to list get_close_matches?

In [1]:
from difflib import get_close_matches

strs = ['389-ds-base-1.3.9.1-10.el7.x86_64.rpm',
 'GeoIP-1.5.0-15.el7.x86_64.rpm',
 '389-ds-base-devel-1.3.9.1-10.el7.x86_64.rpm',
 'Cython-0.19-4.el7.x86_64.rpm',
 'Cython-0.19-5.el7.x86_64.rpm',
 'GeoIP-1.5.0-14.el7.x86_64.rpm']

print(get_close_matches('GeoIP',strs,n=2,cutoff=0.2))

['GeoIP-1.5.0-15.el7.x86_64.rpm', 'GeoIP-1.5.0-14.el7.x86_64.rpm']


### class HtmlDiff

### How to compare two lines? 

In [1]:
from difflib import HtmlDiff

line1 = ['Chennai is very hot.']
line2 = ['Chennai is very sweet.']
print(HtmlDiff().make_file(line1,line2), file = open('/home/mana/Work/diff.html','w'))

### How to compare two files?

In [2]:
from difflib import HtmlDiff
from pathlib import Path

f1 = Path('/home/mana/Work/file1.txt').read_text().splitlines()
f2 = Path('/home/mana/Work/file2.txt').read_text().splitlines()
print(HtmlDiff().make_file(f1,f2), file = open('/home/mana/Work/diff.html','w'))

## **Excercise**

#### **How to find different and similar lines between two files?**

In [10]:
from difflib import Differ,ndiff
from pathlib import Path

P = Path('/home/mana/Work')
f1 = (P/'sample1.txt').read_text().splitlines()
f2 = (P/'sample2.txt').read_text().splitlines()
same = Differ().compare(f1,f2)
different = ndiff(f1, f2)
same_lines = [x.replace('  ','') for x in same if x.startswith(' ')]
different_lines = [x.replace('- ','') for x in different 
                   if x.startswith('-')]
print("These lines are same: ")
print(*same_lines, sep = '\n')
print()
print("These lines are different: ")
print(*different_lines, sep = '\n')

These lines are same: 
I need to buy apples.

These lines are different: 
I need to run the laundry.
I need to wash the dog.
I need to get the car detailed.
