In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('flights.csv', low_memory=False)


df_mask = df[['DEPARTURE_TIME', 'ARRIVAL_TIME']].isna().any(axis=1)
df = df.loc[~df_mask]

df = df.drop(columns=['TAIL_NUMBER', 'FLIGHT_NUMBER','CANCELLATION_REASON'])

mean_arrival_delay = df['ARRIVAL_DELAY'].mean()
df['ARRIVAL_DELAY'].fillna(mean_arrival_delay, inplace=True)


average_delay = df.groupby('AIRLINE')['ARRIVAL_DELAY'].mean().reset_index()
average_delay = average_delay.sort_values(by='ARRIVAL_DELAY', ascending=False)

plt.figure(figsize=(12, 6))
plt.bar(average_delay['AIRLINE'], average_delay['ARRIVAL_DELAY'], color='skyblue')
plt.title('Средняя задержка рейсов по авиакомпаниям')
plt.xlabel('Авиакомпания')
plt.ylabel('Средняя задержка (минуты)')
plt.xticks(rotation=45)
plt.grid(axis='y')

plt.tight_layout()
plt.show()





df = df[df['ORIGIN_AIRPORT'].str.match(r'^[A-Z]{3}$')]
average_delay1 = df.groupby('ORIGIN_AIRPORT')['ARRIVAL_DELAY'].mean().reset_index()

# Переименовываем столбцы для удобства


# Фильтруем аэропорты с абсолютной средней задержкой > 14 минут
filtered_delay = average_delay1[average_delay1['ARRIVAL_DELAY'].abs() > 10]

# Сортируем по средней задержке
filtered_delay = filtered_delay.sort_values(by='ARRIVAL_DELAY', ascending=False)

# Строим гистограмму
plt.figure(figsize=(12, 6))
plt.bar(filtered_delay['ORIGIN_AIRPORT'], filtered_delay['ARRIVAL_DELAY'], color='skyblue')
plt.xticks(rotation=90)  
plt.title('Средняя задержка рейсов для каждого аэропорта отправления (абсолютное значение > 10 минут)')
plt.xlabel('Аэропорт отправления')
plt.ylabel('Средняя задержка (минуты)')
plt.tight_layout() 
plt.show()





delay_by_day = df.groupby('DAY_OF_WEEK')['ARRIVAL_DELAY'].mean()

plt.figure(figsize=(10, 6))
delay_by_day.plot(kind='bar', color='skyblue')
plt.title('Средняя задержка прибытия по дням недели')
plt.xlabel('День недели')
plt.ylabel('Средняя задержка (минуты)')
plt.xticks(rotation=0)  
plt.grid(axis='y')

plt.show()







df = df.dropna(subset=['DISTANCE'])

filtered_df1 = df[df['DISTANCE'] < 1000]
filtered_df2 = df[(df['DISTANCE'] >= 1000) & (df['DISTANCE'] < 2000)]
filtered_df3 = df[(df['DISTANCE'] >= 2000) & (df['DISTANCE'] < 3000)]
filtered_df4 = df[(df['DISTANCE'] >= 3000) & (df['DISTANCE'] < 4000)]
filtered_df5 = df[(df['DISTANCE'] >= 4000) & (df['DISTANCE'] < 5000)]

mean_1 = filtered_df1['ARRIVAL_DELAY'].mean()
mean_2 = filtered_df2['ARRIVAL_DELAY'].mean()
mean_3 = filtered_df3['ARRIVAL_DELAY'].mean()
mean_4 = filtered_df4['ARRIVAL_DELAY'].mean()
mean_5 = filtered_df5['ARRIVAL_DELAY'].mean()


plt.figure(figsize=(12, 6))
plt.bar(['<1000','1000-2000','2000-3000','3000-4000','4000-5000'], [mean_1, mean_2, mean_3, mean_4, mean_5], width=1.0, color='skyblue')
plt.title('Средняя задержка прибытия в зависимости от расстояния')
plt.xlabel('Расстояние (в милях)')
plt.ylabel('Средняя задержка (в минутах)')
plt.xticks(rotation=45)
plt.grid(axis='y')

plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('flights.csv', low_memory=False)

df = df.dropna(subset=['DISTANCE'])
mean_arrival_delay = df['ARRIVAL_DELAY'].mean()
df['ARRIVAL_DELAY'].fillna(mean_arrival_delay, inplace=True)


filtered_df1 = df[df['DISTANCE'] < 1000]
filtered_df2 = df[(df['DISTANCE'] >= 1000) & (df['DISTANCE'] < 2000)]
filtered_df3 = df[(df['DISTANCE'] >= 2000) & (df['DISTANCE'] < 3000)]
filtered_df4 = df[(df['DISTANCE'] >= 3000) & (df['DISTANCE'] < 4000)]
filtered_df5 = df[(df['DISTANCE'] >= 4000) & (df['DISTANCE'] < 5000)]

mean_1 = filtered_df1['ARRIVAL_DELAY'].mean()
mean_2 = filtered_df2['ARRIVAL_DELAY'].mean()
mean_3 = filtered_df3['ARRIVAL_DELAY'].mean()
mean_4 = filtered_df4['ARRIVAL_DELAY'].mean()
mean_5 = filtered_df5['ARRIVAL_DELAY'].mean()


plt.figure(figsize=(12, 6))
plt.bar(['<1000','1000-2000','2000-3000','3000-4000','4000-5000'], [mean_1, mean_2, mean_3, mean_4, mean_5], width=1.0, color='skyblue')
plt.title('Средняя задержка прибытия в зависимости от расстояния')
plt.xlabel('Расстояние (в милях)')
plt.ylabel('Средняя задержка (в минутах)')
plt.xticks(rotation=45)
plt.grid(axis='y')

plt.show()



In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('flights.csv', low_memory=False)

df = df.dropna(subset=['DEPARTURE_TIME'])

mean_arrival_delay = df['ARRIVAL_DELAY'].mean()
df['ARRIVAL_DELAY'].fillna(mean_arrival_delay, inplace=True)

bins = np.arange(0, 2400, 100)  
labels = [f'{i//100}:{i%100:02d}' for i in bins[:-1]]  

mean_delays = []

for i in range(len(bins)):
    filtered_df = df[(df['DEPARTURE_TIME'] >= bins[i]) & (df['DEPARTURE_TIME'] < bins[i + 1])]
    mean_delay = filtered_df['ARRIVAL_DELAY'].mean()
    mean_delays.append(mean_delay if not np.isnan(mean_delay) else 0)  


plt.figure(figsize=(12, 6))
plt.bar(labels, mean_delays, width=0.6, color='skyblue')
plt.title('Средняя задержка прибытия в зависимости от времени отправления')
plt.xlabel('Время отправления (чч:мм)')
plt.ylabel('Средняя задержка (в минутах)')
plt.xticks(rotation=45)
plt.grid(axis='y')


plt.show()