In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import sys
from scipy.optimize import curve_fit, leastsq

#%matplotlib qt
#%matplotlib ipympl
%matplotlib notebook


In [76]:
def create_df(filename):
    df= pd.read_json(filename)
    return df

def filter_data(df):    
    mean = df['price'].mean()
    std = df['price'].std()
    lower_range = mean - 2*std
    upper_range = mean + 2*std
    mask = (df['price'] >= lower_range) & (df['price'] <= upper_range)
    print(len(df['price']))
    df = df.loc[mask]
    print(len(df['price']))

    df['departure_date'] = pd.to_datetime(df['departure_date'].str[:10])
    df = df.sort_values(by='price')
    df.index = df['departure_date']
    return df
    
def sort_by_date(df):
    dates = pd.DatetimeIndex(df['departure_date'].sort_values().unique())
    print(dates)
    df2 = pd.DataFrame(index=dates, columns=['price', 'seats_available'])
    for i in df['departure_date'].unique():
        
        quantile = pd.Series(df.loc[i, 'price']).quantile(q=0.15, interpolation = 'lower')
        idx = np.where(quantile == df.loc[i, 'price'])[0][0]
        print(idx)
        new_row = {'price': quantile, 'seats_available': df.loc[i, 'seats_available'][idx]}
        df2.loc[i] = new_row

    return df2



In [None]:
df_names = ['OPO_to_BHX_oneway']
dict_df = {}
small_df = {}
for df_name in df_names:
    
    dict_df[df_name] = create_df(df_name+'.json')
    dict_df[df_name] = filter_data(dict_df[df_name])
    small_df[df_name] = sort_by_date(dict_df[df_name])



In [78]:
dict_df['OPO_to_BHX_oneway']

Unnamed: 0_level_0,id,quality,price,airlines,departure_duration,departure_date,routecount,seats_available
departure_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-05-25,093e25584c2d0000c59d16ff_0|25581b7c4c2e0000cc4...,677.587331,125,[FR],15.250000,2023-05-25,2,
2023-05-11,093e25584c1f000045d79ab4_0|25581b7c4c20000083d...,679.852749,127,[FR],15.250000,2023-05-11,2,6.0
2023-12-18,093e01af4cfc00003f51c078_0|01af1b7c4cfd0000c8d...,697.315645,133,[VY],15.583333,2023-12-18,2,5.0
2023-12-15,093e01af4cf90000118b3246_0|01af1b7c4cfa000084e...,723.982245,133,[VY],16.416667,2023-12-15,2,5.0
2023-06-20,093e22ad4c480000438b3ad4_0|22ad1b7c4c48000002d...,448.513783,137,"[FR, U2]",7.666667,2023-06-20,2,5.0
...,...,...,...,...,...,...,...,...
2023-08-21,093e1b7c4c86000015f6d2d9_0|093e1b7c4c86000015f...,2442.717867,1947,[LH],5.916667,2023-08-21,2,
2023-04-14,093e10e84c050000663a33f0_0|10e81b7c4c0500004ff...,2493.384407,1947,[KL],7.500000,2023-04-14,2,
2023-08-23,093e060c4c8700005d42c452_0|060c1b7c4c884c8cb9b...,2667.850016,1948,"[KL, LG]",12.916667,2023-08-23,3,
2023-08-02,093e1b7c4c730000cad184de_0|093e1b7c4c730000cad...,2435.449355,1950,[LX],5.583333,2023-08-02,2,9.0


### This creates a line of best fit using a series of sine waves

In [12]:
def return_val(dict_df, df_name):
    df = dict_df[df_name]
    y = df['price']
    x = df.index
    x_line = np.array(x.astype(int) / 10**9)
    # Finding the amplitude of the sin waves
    amp = abs(np.fft.fft(y))
    #Sorting the indices of the amplitudes in descending order
    indices = np.flip(np.argsort(amp))
    # Finding the frequency of the sin waves
    freq = np.fft.fftfreq(len(x_line), (x_line[1]-x_line[0]))
    guess_amp = np.std(y) * 2**0.5
    guess_freq = abs(freq[np.argmax(amp[1:])+1])
    phase = 0
    guess_offset = np.mean(y) * 2**0.5
    guess = [guess_amp, 2*np.pi*guess_freq, phase,  guess_offset]
    return x, y, x_line, amp, freq, phase, guess_offset, indices


def sinfunc(x, a, w, p):
    return a * np.sin(x*w+p)
def est_param(x_line, y, a, w, p, c, indices):
    est_amps = np.empty(len(a))
    est_freq = np.empty(len(a))
    est_phase = np.empty(len(a))
    for i in indices:
        popt, pcov= curve_fit(sinfunc, x_line, y,  p0 = [a[i], w[i], p])
        est_amps[i] = popt[0]
        est_freq[i] = popt[1]
        est_phase[i] = popt[2]
        

    est_values = [est_amps, est_freq, est_phase]
    return est_values

def model_based_on_param(x_line,est_values, degree):
    x_line_dense = np.linspace(x_line.min(), x_line.max(), 4*len(x_line))
    x_dense = pd.to_datetime(x_line_dense, unit='s')
    y_dense = np.zeros(shape=len(x_line_dense))

    ind = np.argpartition(est_values[0], -degree)[-degree:]
    for i in ind:
        y_dense += sinfunc(x_line_dense, est_values[0][i], est_values[1][i],est_values[2][i]) 
    return x_dense, y_dense

def plot_graph_fourier(x_line, y_line, a, b, x, y, df_name, ax, colour):
    
    ax.plot(x_line,y_line*a+b, label = df_name, color = colour)
    #ax.scatter(x, y, color = colour, marker='.',label = df_name)
    
    ax.legend(fontsize=12)
    ax.set_title('Price of flights in the bottom 15% for 4 adults')



In [None]:
fig, ax = plt.subplots(figsize = (12, 6))
colours = ['red', 'green', 'blue']
for i in range(len(df_names)):
    x, y, x_line, amp, freq, phase,  guess_offset, indices= return_val(small_df,df_names[i])
    guess = [amp, 2*np.pi*freq, phase,  guess_offset]
    est_values= est_param(x_line,y, *guess, indices=indices)
    x_line, y_line = model_based_on_param(x_line,est_values, 7)
    plot_graph_fourier(x_line, y_line, 0.5, 0, x, y, df_names[i], ax, colours[i])

In [17]:

def plot(small_df,filename,  ax, colour):
    x = small_df[filename].index
    y = small_df[filename]['price']
    x_line= x.astype(int) / 10**9
    y = y.astype(int)
    x_line_dense = np.linspace(x_line.min(), x_line.max(), 4*len(x_line))
    x_dense = pd.to_datetime(x_line_dense, unit='s')

    p= np.polyfit(x_line, y, 4)
    y_line = np.polyval(p,x_line_dense)
    ax.plot(x_dense, y_line, label=filename, color=colour)

    ax.set_ylabel('Price in GBP')
    #ax.set_yticks(np.arange(0, np.max(y)+1, 100))
    ax.set_xlabel('Date')
    ax.set_title('The price of a Oneway flight on each of the day of the year for 4 adults( adult > 12y/o)')
    ax.scatter(x, y, marker ='.', color=colour, label=filename)
    ax.legend(fontsize=12)
    

fig, ax = plt.subplots(figsize =(12, 6))
#plot(BHX_to_IAS2,'BHX_to_IAS', ax, colour='red')
plot(small_df,'OPO_to_BHX_oneway', ax, colour='blue')
#plot(LTN_to_IAS2,'LTN_to_IAS', ax, colour='green')
plt.show()


<IPython.core.display.Javascript object>

  x_line= x.astype(int) / 10**9
  plot(small_df,'OPO_to_BHX_oneway', ax, colour='blue')


In [10]:
import mpld3
def plot_with_hover(df):
    x = df.index
    y = df['price']
    c = np.random.randint(1,5,size=15)
    norm = plt.Normalize(1,4)
    cmap = plt.cm.RdYlGn
    names = np.array(list("ABCDEFGHIJKLMNO"))

    fig,ax = plt.subplots()
    sc = plt.scatter(x,y)

    annot = ax.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points",
                        bbox=dict(boxstyle="round", fc="w"),
                        arrowprops=dict(arrowstyle="->"))
    annot.set_visible(False)

    def update_annot(ind):
        
        pos = sc.get_offsets()[ind["ind"][0]]
        annot.xy = pos
        text = "{}".format(" ".join(list(map(str,y[ind["ind"]]))))
        annot.set_text(text)
        #annot.get_bbox_patch().set_facecolor(cmap(norm(c[ind["ind"][0]])))
        #annot.get_bbox_patch().set_alpha(0.4)
        

    def hover(event):
        vis = annot.get_visible()
        if event.inaxes == ax:
            cont, ind = sc.contains(event)
            if cont:
                update_annot(ind)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    annot.set_visible(False)
                    fig.canvas.draw_idle()

    fig.canvas.mpl_connect("motion_notify_event", hover)
    html_str = mpld3.fig_to_html(fig)
    html_file= open("htmltest1.html","w")
    html_file.write(html_str)
    html_file.close()


In [None]:
plot_with_hover(small_df['OPO_to_BHX_oneway'])

In [3]:
class LineBuilder:
    def __init__(self, line):
        self.line = line
        self.xs = list(line.get_xdata())
        self.ys = list(line.get_ydata())
        self.cid = line.figure.canvas.mpl_connect('button_press_event', self)

    def __call__(self, event):
        print('click', event)
        if event.inaxes!=self.line.axes: return
        self.xs.append(event.xdata)
        self.ys.append(event.ydata)
        self.line.set_data(self.xs, self.ys)
        self.line.figure.canvas.draw()

fig, ax = plt.subplots()
ax.set_title('click to build line segments')
line, = ax.plot([0], [0])  # empty line
linebuilder = LineBuilder(line)

plt.show()
plt.savefig('Test1.svg')

click button_press_event: xy=(318, 366) xydata=(-0.00221774193548388, 0.03821428571428573) button=1 dblclick=False inaxes=AxesSubplot(0.125,0.11;0.775x0.77)
click button_press_event: xy=(275, 266) xydata=(-0.011754032258064526, 0.008452380952380961) button=1 dblclick=False inaxes=AxesSubplot(0.125,0.11;0.775x0.77)
click button_press_event: xy=(212, 254) xydata=(-0.02572580645161291, 0.004880952380952389) button=1 dblclick=False inaxes=AxesSubplot(0.125,0.11;0.775x0.77)
click button_press_event: xy=(385, 222) xydata=(0.012641129032258058, -0.004642857142857143) button=1 dblclick=False inaxes=AxesSubplot(0.125,0.11;0.775x0.77)
click button_press_event: xy=(444, 319) xydata=(0.025725806451612904, 0.024226190476190484) button=1 dblclick=False inaxes=AxesSubplot(0.125,0.11;0.775x0.77)
click button_press_event: xy=(352, 175) xydata=(0.005322580645161279, -0.01863095238095238) button=1 dblclick=False inaxes=AxesSubplot(0.125,0.11;0.775x0.77)
click button_press_event: xy=(237, 346) xydata=(-0.

In [1]:

def plot_with_hover_pyqt6(self):

        self.seats_available = np.array(self.big_df['seats_available']).astype(int)

        fig,ax = plt.subplots(figsize = (12, 6))
        sc = plt.scatter(self.x,self.y)

        annot = ax.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points",
                            bbox=dict(boxstyle="round", fc="w"),
                            arrowprops=dict(arrowstyle="->"))
        annot.set_visible(False)

        def update_annot(ind):
            
            pos = sc.get_offsets()[ind["ind"][0]]
            annot.xy = pos
            text = "{}£\n{} seats available\n {}".format(
                                " ".join(list(map(str,self.y[ind["ind"]]))),
                                " ".join(list(map(str,self.seats_available[ind["ind"]]))),
                                " ".join(list(map(str,self.x[ind["ind"]]))))
            annot.set_text(text)
            #annot.get_bbox_patch().set_facecolor(cmap(norm(c[ind["ind"][0]])))
            #annot.get_bbox_patch().set_alpha(0.4)
            

        def hover(event):
            vis = annot.get_visible()
            if event.inaxes == ax:
                cont, ind = sc.contains(event)
                if cont:
                    update_annot(ind)
                    annot.set_visible(True)
                    fig.canvas.draw_idle()
                else:
                    if vis:
                        annot.set_visible(False)
                        fig.canvas.draw_idle()




        fig.canvas.mpl_connect("motion_notify_event", hover)
        plt.show()

In [None]:
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from io import BytesIO


ET.register_namespace("", "http://www.w3.org/2000/svg")

fig, ax = plt.subplots()

# Create patches to which tooltips will be assigned.
rect1 = plt.Rectangle((10, -20), 10, 5, fc='blue')
rect2 = plt.Rectangle((-20, 15), 10, 5, fc='green')

shapes = [rect1, rect2]
labels = ['This is a blue rectangle.', 'This is a green rectangle']

for i, (item, label) in enumerate(zip(shapes, labels)):
    patch = ax.add_patch(item)
    annotate = ax.annotate(labels[i], xy=item.get_xy(), xytext=(0, 0),
                           textcoords='offset points', color='w', ha='center',
                           fontsize=8, bbox=dict(boxstyle='round, pad=.5',
                                                 fc=(.1, .1, .1, .92),
                                                 ec=(1., 1., 1.), lw=1,
                                                 zorder=1))

    ax.add_patch(patch)
    patch.set_gid(f'mypatch_{i:03d}')
    annotate.set_gid(f'mytooltip_{i:03d}')

# Save the figure in a fake file object
ax.set_xlim(-30, 30)
ax.set_ylim(-30, 30)
ax.set_aspect('equal')

f = BytesIO()
plt.savefig(f, format="svg")

# --- Add interactivity ---

# Create XML tree from the SVG file.
tree, xmlid = ET.XMLID(f.getvalue())
tree.set('onload', 'init(event)')

for i in shapes:
    # Get the index of the shape
    index = shapes.index(i)
    # Hide the tooltips
    tooltip = xmlid[f'mytooltip_{index:03d}']
    tooltip.set('visibility', 'hidden')
    # Assign onmouseover and onmouseout callbacks to patches.
    mypatch = xmlid[f'mypatch_{index:03d}']
    mypatch.set('onmouseover', "ShowTooltip(this)")
    mypatch.set('onmouseout', "HideTooltip(this)")

# This is the script defining the ShowTooltip and HideTooltip functions.
script = """
    <script type="text/ecmascript">
    <![CDATA[

    function init(event) {
        if ( window.svgDocument == null ) {
            svgDocument = event.target.ownerDocument;
            }
        }

    function ShowTooltip(obj) {
        var cur = obj.id.split("_")[1];
        var tip = svgDocument.getElementById('mytooltip_' + cur);
        tip.setAttribute('visibility', "visible")
        }

    function HideTooltip(obj) {
        var cur = obj.id.split("_")[1];
        var tip = svgDocument.getElementById('mytooltip_' + cur);
        tip.setAttribute('visibility', "hidden")
        }

    ]]>
    </script>
    """

# Insert the script at the top of the file and save it.
tree.insert(0, ET.XML(script))
ET.ElementTree(tree).write('svg_tooltip.svg')

In [8]:
import plotly.express as px


In [18]:
x = small_df['OPO_to_BHX_oneway'].index
y = small_df['OPO_to_BHX_oneway']['price']
fig = px.scatter(x=x, y=y)
fig.write_html('D:\COding\Python\Python web scraping\Flight tickets\Airfare-flights KIWI API\Graphs\Plotly graphs\Test1 Interactive plot.html')

In [31]:
small_df['OPO_to_BHX_oneway']

Unnamed: 0_level_0,price,seats_available
departure_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-04-17,departure_date 2023-04-17 1243 2023-04-17 ...,departure_date 2023-04-17 4.0 2023-04-17 ...
2023-04-18,departure_date 2023-04-18 217 2023-04-18 ...,departure_date 2023-04-18 6.0 2023-04-18 ...
2023-04-19,departure_date 2023-04-19 590 2023-04-19 ...,departure_date 2023-04-19 9.0 2023-04-19 ...
2023-04-20,departure_date 2023-04-20 209 2023-04-20 ...,departure_date 2023-04-20 6.0 2023-04-20 ...
2023-04-21,departure_date 2023-04-21 398 2023-04-21 ...,departure_date 2023-04-21 5.0 2023-04-21 ...
...,...,...
2024-01-11,departure_date 2024-01-11 409 2024-01-11 ...,departure_date 2024-01-11 6.0 2024-01-11 ...
2024-01-12,departure_date 2024-01-12 457 2024-01-12 ...,departure_date 2024-01-12 6.0 2024-01-12 ...
2024-01-13,departure_date 2024-01-13 288 2024-01-13 ...,departure_date 2024-01-13 6.0 2024-01-13 ...
2024-01-14,departure_date 2024-01-14 299 2024-01-14 ...,departure_date 2024-01-14 6.0 2024-01-14 ...
