In [None]:
# A starting program for assignment 4 - UFO Sightings.
# This program loads the data from a file, which contains over 80,000 records,
# and saves the information in a list of dictionaries, with one dictionary
# for each sighting.  The main function carries out a few data mining
# exercises on the data.

# Some functions are supplied, but you will need to add more so that the
# entire program runs without error and supplies the results as indicated.
# You can examine the code in main to find out the names of most, but not all,
# of the functions you should write.

# You may add more data mining exercises to main if you wish to do so - but
# will not get any bonus marks for this extra work. There is no fixed number of
# functions that you have to write - you need to decide yourself which functions
# make for the best program structure.

# Supplied
# Used by the readData function to parse an 8 digit date string into three
# integer date components.

def parseDate(dateString) :
    space = dateString.find(" ")
    if space == -1 :
        space = len(dateString)
    if space < 11 and space > 7 :
        date = dateString[0 : space]
        month, day, year = date.split("/")
        return int(year), int(month), int(day)
    else :
        return None, None, None

# Supplied
# Reads the given file, which consists of sighting data in comma-delimited text format,
# with one sighting per line.  A dictionary record is created for each sighting
# and a list of all these dictionaries is returned.  The form of the data is in:
# datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
# Dates are in the format: mm/dd/yyyy, but could also be m/d/yyyy.
def readData(filename) :
    data = []
    ufoFile = open(filename)
    for line in ufoFile :
        sighting = {}
        sighting["sightedYear"] = None      # Date sighting occurred
        sighting["sightedMonth"] = None
        sighting["sightedDay"] = None
        sighting["reportedYear"] = None     # Date sighting reported
        sighting["reportedMonth"] = None
        sighting["reportedDay"] = None
        sighting["country"] = None          # Location, mostly uses 2 letter abbreviations
        sighting["state"] = None
        sighting["city"] = None
        sighting["shape"] = None            # UFO shape
        sighting["duration"] = None         # Duration of sighting in seconds
        sighting["description"] = None      # Free-text description of sighting - not complete
        sighting["latitude"] = None         # Location of sighting
        sighting["longitude"] = None
        pieces = line.strip().split(',')
        if len(pieces) == 11 :
            sighting["sightedYear"], sighting["sightedMonth"], sighting["sightedDay"] = \
                                   parseDate(pieces[0].strip())
            sighting["city"] = pieces[1].strip()
            sighting["state"] = pieces[2].strip()
            sighting["country"] = pieces[3].strip()
            sighting["shape"] = pieces[4].strip()
            sighting["duration"] = pieces[5].strip()
            sighting["description"] = pieces[7].strip()
            sighting["reportedYear"], sighting["reportedMonth"], sighting["reportedDay"] = \
                                   parseDate(pieces[8].strip())
            try :
                sighting["latitude"] = float(pieces[9].strip())
                sighting["longitude"] = float(pieces[10].strip())
            except ValueError :
                sighting["latitude"] = None
                sighting["longitude"] = None                
            data.append(sighting)
    ufoFile.close()
    return data

# Write your functions here
def highestSightingsStates(data,country):
    dic = {}
    #print("test")
    count = 0;
    for i in range(len(data)):
        if(data[i]["country"]==country):
            count += 1
            s = data[i]["state"]
            if str in dic and s != "?":
                dic[s] += 1
            else:
                dic[s] = 1;
    print(count,"sightings in",country)
    L = sorted(dic.items(),key=lambda item:item[1],reverse=True)
    L = L[0:2]
    print("Highest state:",L[0][0],"with",L[0][1])
    print("Second hightest state: " ,L[1][0],"wit ",L[1][1])

def uniqueValues(data,s):
    list = []
    for i in range(len(data)):
        tmp = data[i][s]
        if tmp not in list:
            list.append(tmp)
    return list
def frequencyCount(data,s,list):
    dic = {}
    for i in range(len(data)):
        tmp = str(data[i-1][s])
        if tmp in dic:
            dic[tmp] += 1
        else:
            dic[tmp] = 1
    return dic

def displayInOrder(dic):
    list = sorted(dic.items(),key= lambda x:x[0])
    for i in range(len(list)):
        print(list[i][0],":",list[i][1])
        
def highestFrequency(dic):
    list = sorted(dic.items(),key = lambda item:item[1],reverse = True)
    s =  list[0][0]
    return s

def search(data, year, month, day):
    list = []
    for i in range(len(data)):
        if data[i]["sightedYear"] == str(year) and data[i]["sightedMonth"] == str(month) and data[i]["sightedDay"] == str(day):
            list.append(data[i])
    return list

# Supplied
def main() :
    data = readData("ufo_sightings.csv")
    if len(data) == 0 :
        print("Ooooops. Exiting!")
        return
    print(len(data), "sightings read from file.")
        
    # Output from first call to highestSightingsStates, with ? for removed digits:
    # ????? sightings in USA
    # Highest state: ?? with ???? sightings.
    # Second highest state: ?? with ???? sightings.
    highestSightingsStates(data, "us")  # for USA
    highestSightingsStates(data, "ca")  # for Canada
    

    # Output of following three lines of code with some years skipped:
    # Frequencies in key order:
    # 1906 : 1
    # 1910 : 2
    # 1916 : 1
    # 1920 : 1
    # 1925 : 1
    # .... (other lines)
    # 2012 : 7357
    # 2013 : 7037
    # 2014 : 2260
    
    allYears = uniqueValues(data, "sightedYear")
    yearFrequencies = frequencyCount(data, "sightedYear", allYears)
    displayInOrder(yearFrequencies)
    
    # Output is month followed by the count for that month for all 12 months in a year.
    print("\nMonth sighting counts:")
    allMonths = uniqueValues(data, "sightedMonth")
    monthFrequencies = frequencyCount(data, "sightedMonth", allMonths)
    for month in range(1, 13) :
        print(month, ":", monthFrequencies[str(month)])
    
    # Output of following three lines of code with some shapes skipped:
    # Frequencies in key order:
    # changed : 1
    # changing : 1962
    # chevron : 952
    # cigar : 2057
    # .... (other lines)
    # teardrop : 750
    # triangle : 7865
    # unknown : 5584
    allShapes = uniqueValues(data, "shape")
    shapeFrequencies = frequencyCount(data, "shape", allShapes)
    displayInOrder(shapeFrequencies)
    # Displays the top five shapes going by frequency count from highest to
    # lowest, giving the shape and the frequency count for that shape.
    print("\nTop five shapes:")
    for i in range(1, 6) :
        highest = highestFrequency(shapeFrequencies)
        print("No.", i, "\"", highest, "\" with", shapeFrequencies[highest], "sightings.")
        del shapeFrequencies[highest]
    
    # Displays the search results for a particular date.
    print("\nSupply a date for which you wish to list the available sightings:")
    year = input("Enter year (4 digits): ")
    month = input("Enter month (1 or 2 digits): ")
    day = input("Enter day (1 or 2 digits): ")
    searchResults = search(data, year, month, day)
    print(len(searchResults), "sightings made on this day:")
    for result in searchResults:
        for key in result :
            print(key, "=", result[key])
        print()
    

main()
