# Non negative Matrix Factorization(NMF)

* Download the [Thunder](http://docs.thunder-project.org/introduction) package using the following commands
    * `pip install thunder-python`
    * `pip install thunder-extraction`
* Download the test dataset and run this code.
* The below NMF code is a brief example implemented by [freeman lab](https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113) for this very dataset

## Building a Class to download and setup data
* A class is built that checks if data folder is present and if not downloads and setups the data


In [None]:
import requests
import zipfile
import subprocess
import os

path = 'e:/project3/'
os.chdir(path)


class Loader:
    '''
    Downloads the data and have the data prepared.
    '''
    def __init__(self, download_link = 'gs://uga-dsp/project3/' ):
        self.download_link = download_link
        self.train_path = self.get_train_path()
        self.test_path = self.get_test_path()
        
        if os.path.exists("data"):
            print('Data Folder is ready')
        else:
            self.download_data(download_link)
        
    def download_data(self, download_link):
        if os.path.exists('download_zip123'):
            self.extract_zip_data()
        else:
            print("==> Downloading data")
            subprocess.call('mkdir download_zip123', shell = True)
            subprocess.call('/usr/bin/gsutil -m cp -r ' + download_link + '/download_zip',  shell=True)
            self.extract_zip_data()

        
    def extract_zip_data(self):
        files = os.listdir(path+'download_zip/')
        for file in files:
            print("Extracting "+file)
            zip_ref = zipfile.ZipFile(path +'download_zip/'+ file, 'r')
            if file[-8:] == 'test.zip':
                zip_ref.extractall(d1.test_path)
            else:
                zip_ref.extractall(d1.train_path)
            zip_ref.close()            
        
    def get_train_path(self):
        return 'e:/project3/data/train/'
    
    def get_test_path(self):
        return 'e:/project3/data/test/'

In [None]:
import json
import thunder as td
from extraction import NMF
import os

d1=Loader()


def matrix_factorisation():
    datasets =os.listdir(d1.test_path)
    submission = []
    algorithm_args = dict(k = 5,
                          percentile = 99,
                          max_iter=50,
                          overlap=0.1)

    model_args = dict(chunk_size=(50,50),
                      padding=(25,25))

    for dataset in datasets:
        print('processing dataset: %s' % dataset)
        print('loading')
        
        data = td.images.fromtif(d1.test_path + dataset + '/images', ext='tiff')
        print('analyzing')

        algorithm = NMF(**algorithm_args)
        model = algorithm.fit(data, **model_args)
        merged = model.merge(algorithm_args['overlap'])
        
        print('found %g regions' % merged.regions.count)
        
        regions = [{'coordinates': region.coordinates.tolist()} 
                   for region in merged.regions]
        result = {'dataset': dataset, 'regions': regions}
        
        submission.append(result)

    print('writing results')
    with open('submission3.json', 'w') as f:
        f.write(json.dumps(submission))