This notebook serves to document the progression of optimization found in the queries that search and analyze the directors. It documents the version number, execution time (on the individual's device), and optimization strategies utilized in optimizing the query.

**Best directors overall**

| **Version** | **Execution Time** | **Optimization Strategies Utilized** |
|----------|----------|----------|
| Naive | 452 seconds | Naive solution, utilized GROUP BY and ORDER BY with the AVG function on (weightedRating) |
| First Update | 401 seconds | First optimization, utilized WITH to get a table of directos with at least 3 works|
| Second Update | 0.219 seconds | Second optimization, directors_dt was updated to have an avgWeightedRating column and validCount column to save on computation time |
| Optimized | 0.672 seconds (no difference) | Final optimized version, an index was added on (avgWeightedRating, validCount) |


Naive 

In [None]:
%%sql

SELECT
    d.nconst,
    d.PrimaryName,
    AVG(t.weightedRating) AS avgWeightedRating
FROM title_ft AS t
JOIN title_director_bridge AS tdb
    ON t.tconst = tdb.tconst
JOIN directors_dt AS d
    ON tdb.nconst = d.nconst
GROUP BY d.nconst, d.PrimaryName
HAVING COUNT(t.weightedRating) >= 3
ORDER BY avgWeightedRating DESC;

First Optimization

In [None]:
%%sql

WITH perDirector AS (
  SELECT
      tdb.nconst,
      COUNT(*) AS titleCount,
      AVG(t.weightedRating) AS avgWeightedRating
  FROM title_director_bridge AS tdb
  JOIN title_ft AS t
    ON t.tconst = tdb.tconst
  WHERE t.weightedRating IS NOT NULL
  GROUP BY tdb.nconst
  HAVING COUNT(*) >= 3
)
SELECT
  d.nconst,
  d.PrimaryName,
  pd.avgWeightedRating,
  pd.titleCount
FROM perDirector AS pd
JOIN directors_dt AS d
  ON d.nconst = pd.nconst
ORDER BY pd.avgWeightedRating DESC;

Second Optimization

In [None]:
%%sql 

ALTER TABLE directors_dt
  DROP COLUMN validCount,
  DROP COLUMN avgWeightedRating;

ALTER TABLE directors_dt
  ADD COLUMN validCount INT NOT NULL DEFAULT 0,
  ADD COLUMN avgWeightedRating DECIMAL(5,3) NULL;

UPDATE directors_dt d
JOIN(
    SELECT
      tdb.nconst,
    COUNT(t.weightedRating) AS validCount,
    AVG(t.weightedRating) AS avgWeightedRating
      FROM title_director_bridge tdb
  JOIN title_ft t ON t.tconst = tdb.tconst
  WHERE t.weightedRating IS NOT NULL
  GROUP BY tdb.nconst
) vals ON vals.nconst = d.nconst
SET d.validCount = vals.validCount,
    d.avgWeightedRating = vals.avgWeightedRating;
    

SELECT
  nconst,
  primaryName,
  avgWeightedRating,
  validCount AS titleCount
FROM directors_dt
WHERE validCount >= 3
  AND avgWeightedRating IS NOT NULL
ORDER BY avgWeightedRating DESC;

Final Optimization With Indexes

In [None]:
%%sql

ALTER TABLE directors_dt
    ADD INDEX idx_directors_rating_count (avgWeightedRating, validCount);   

SELECT
  nconst,
  primaryName,
  avgWeightedRating,
  validCount AS titleCount
FROM directors_dt
WHERE validCount >= 3
  AND avgWeightedRating IS NOT NULL
ORDER BY avgWeightedRating DESC;

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

**Directors by Genre and Type**

| **Version** | **Execution Time** | **Optimization Strategies Utilized** |
|----------|----------|----------|
| Naive | 119.672 seconds | Naive solution, utilized GROUP BY and ORDER BY with the AVG, SUM and COUNT functions on (weightedRating, genreID, and * respectively). Used WHERE statements to specify filters on (typeID) |
| First Update | 18.672 seconds | First optimization, utilized a subquery to specify the matches before joining the table |
| Second Update | 6.187 seconds | Second optimization, utilized genre names instead of genre ID, e.g. (Action, Adventure) instead of (17, 18) |
| Third Update | 5.625 seconds | Third optimization, utilized type names instead of typeID, e.g. (tvSeries) instead of (6) |
| Optimized | 5.203 seconds (minor difference) | Final optimized version, indexes were added on (genreID, tconst), (ncosnt, tconst), (genreName), (titleType), and (typeID, tconst, weightedRating)  |
| Test Version | 14.25 seconds (significantly worse) | A genre bitmask was used but resulted in a slower time |

Naive 

In [None]:
%%sql

SELECT
  matches.nconst,
  matches.primaryName,
  COUNT(*) AS titleCount,
  AVG(matches.weightedRating) AS avgWeightedRating
FROM (
  SELECT
    t.tconst,
    t.weightedRating,
    t.typeID,
    d.nconst,
    d.primaryName,
    SUM(g.genreID IN (17,18)) AS genreMatches
  FROM title_ft AS t
  JOIN title_director_bridge AS tdb ON tdb.tconst = t.tconst
  JOIN directors_dt AS d ON d.nconst = tdb.nconst
  JOIN title_genre_bridge AS tgb ON tgb.tconst = t.tconst
  JOIN genre_dt AS g ON g.genreID = tgb.genreID
  GROUP BY
    t.tconst, t.weightedRating, t.typeID,
    d.nconst, d.primaryName
  HAVING genreMatches = 2
) AS matches
WHERE matches.typeID = 6 -- 6: tvSeries
  AND matches.weightedRating IS NOT NULL
GROUP BY matches.nconst, matches.primaryName
HAVING COUNT(*) >= 3
ORDER BY avgWeightedRating DESC;


First Optimization

In [None]:
%%sql

SELECT
    d.nconst,
    d.primaryName,
    COUNT(*) AS titleCount,
    AVG(t.weightedRating) AS avgWeightedRating
-- To fit the target genres properly in any order use IN
-- use the title_genre_bridge, match the count so that having other genres around that still works
FROM (
  SELECT tgb.tconst
  FROM title_genre_bridge AS tgb
  WHERE tgb.genreID IN (17, 18) -- 17: Action and 18: Adventure
  GROUP BY tgb.tconst
  HAVING COUNT(DISTINCT tgb.genreID) = 2
) AS matches
JOIN title_ft AS t ON t.tconst = matches.tconst
JOIN title_director_bridge AS tdb ON tdb.tconst = t.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
WHERE t.weightedRating IS NOT NULL
  AND t.typeID = 6 -- 6: tvSeries
GROUP BY d.nconst, d.primaryName
HAVING COUNT(*) >= 3
ORDER BY avgWeightedRating DESC;

Second Optimization

In [None]:
%%sql 

SELECT
    d.nconst,
    d.primaryName,
    COUNT(*) AS titleCount,
    AVG(t.weightedRating) AS avgWeightedRating
-- To fit the target genres properly in any order
-- use the title_genre_bridge, match the count so that having other genres around that still works
FROM (
    SELECT tgb.tconst
    FROM title_genre_bridge AS tgb
    JOIN genre_dt AS g ON g.genreID = tgb.genreID
    WHERE g.genreName IN ('Action','Adventure') -- Targets here
    GROUP BY tgb.tconst
    HAVING COUNT(DISTINCT g.genreName) = 2 -- Must include all target counts
) AS matches
JOIN title_ft AS t ON t.tconst = matches.tconst
JOIN title_director_bridge AS tdb ON tdb.tconst = t.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
WHERE t.weightedRating IS NOT NULL
  AND t.typeID = 6 -- TvSeries
GROUP BY d.nconst, d.primaryName
HAVING COUNT(*) >= 3
ORDER BY avgWeightedRating DESC;

Third Optimization

In [None]:
%%sql

SELECT
    d.nconst,
    d.primaryName,
    COUNT(*) AS titleCount,
    AVG(t.weightedRating) AS avgWeightedRating
-- To fit the target genres properly in any order
-- use the title_genre_bridge, match the count so that having other genres around that still works
FROM (
    SELECT tgb.tconst
    FROM title_genre_bridge AS tgb
    JOIN genre_dt AS g ON g.genreID = tgb.genreID
    WHERE g.genreName IN ('Action','Adventure')
    GROUP BY tgb.tconst
    HAVING COUNT(DISTINCT g.genreName) = 2
) AS matches
JOIN title_ft AS t ON t.tconst = matches.tconst
JOIN type_dt AS td ON t.typeID = td.typeID
JOIN title_director_bridge AS tdb ON tdb.tconst = t.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
WHERE t.weightedRating IS NOT NULL
  AND td.titleType = 'tvSeries'
GROUP BY d.nconst, d.primaryName
HAVING COUNT(*) >= 3
ORDER BY avgWeightedRating DESC;

Final Optimization With Indexes


In [None]:
%%sql

ALTER TABLE title_genre_bridge
  ADD INDEX idx_tgb_genre_tconst (genreID, tconst);

ALTER TABLE title_director_bridge
  ADD INDEX idx_tdb_nconst_tconst (nconst, tconst);
  
ALTER TABLE genre_dt
  ADD INDEX idx_genre_name (genreName);
  
ALTER TABLE type_dt
  ADD INDEX idx_type_title (titleType);

ALTER TABLE title_ft
  ADD INDEX idx_title_type (typeID, tconst, weightedRating);

SELECT
    d.nconst,
    d.primaryName,
    COUNT(*) AS titleCount,
    AVG(t.weightedRating) AS avgWeightedRating
-- To fit the target genres properly in any order
-- use the title_genre_bridge, match the count so that having other genres around that still works
FROM (
    SELECT tgb.tconst
    FROM title_genre_bridge AS tgb
    JOIN genre_dt AS g ON g.genreID = tgb.genreID
    WHERE g.genreName IN ('Action','Adventure')
    GROUP BY tgb.tconst
    HAVING COUNT(DISTINCT g.genreName) = 2
) AS matches
JOIN title_ft AS t ON t.tconst = matches.tconst
JOIN type_dt AS td ON t.typeID = td.typeID
JOIN title_director_bridge AS tdb ON tdb.tconst = t.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
WHERE t.weightedRating IS NOT NULL
  AND td.titleType = 'tvSeries'
GROUP BY d.nconst, d.primaryName
HAVING COUNT(*) >= 3
ORDER BY avgWeightedRating DESC;

Test Version

In [None]:
%%sql
DROP TABLE IF EXISTS genre_map;
CREATE TABLE genre_map (
  genreID INT PRIMARY KEY,
  bitpos TINYINT UNSIGNED NOT NULL
) ENGINE=InnoDB;

INSERT INTO genre_map (genreID, bitpos)
SELECT genreID, ROW_NUMBER() OVER (ORDER BY genreID) - 1
FROM genre_dt
ORDER BY genreID;

-- ALTER TABLE title_ft
--   DROP COLUMN genreCombo,
--   DROP COLUMN genreMask;

ALTER TABLE title_ft
  ADD COLUMN genreCombo VARCHAR(255) NULL,
  ADD COLUMN genreMask  BIGINT UNSIGNED NULL;

WITH
genresPerTitle AS (
  SELECT
      tgb.tconst,
      GROUP_CONCAT(g.genreName ORDER BY g.genreName SEPARATOR ', ') AS genreCombo
  FROM title_genre_bridge AS tgb
  JOIN genre_dt AS g
    ON g.genreID = tgb.genreID
  GROUP BY tgb.tconst
),
masks AS (
  SELECT
      tgb.tconst,
      BIT_OR(1 << gm.bitpos) AS combo_mask
  FROM title_genre_bridge AS tgb
  JOIN genre_map AS gm
    ON gm.genreID = tgb.genreID
  GROUP BY tgb.tconst
)
UPDATE title_ft t
JOIN genresPerTitle gpt ON gpt.tconst = t.tconst
JOIN masks m ON m.tconst   = t.tconst
SET t.genreCombo = gpt.genreCombo,
    t.genreMask  = m.combo_mask;

SET @requiredGenreMask := (
  SELECT BIT_OR(1 << gm.bitpos)
  FROM genre_dt g
  JOIN genre_map gm ON gm.genreID = g.genreID
  WHERE g.genreName IN ('Action','Adventure')
);


SELECT
    d.nconst,
    d.primaryName,
    COUNT(*) AS titleCount,
    AVG(t.weightedRating) AS avgWeightedRating
FROM title_ft AS t
JOIN type_dt AS td             ON td.typeID  = t.typeID
JOIN title_director_bridge tdb ON tdb.tconst = t.tconst
JOIN directors_dt d            ON d.nconst   = tdb.nconst
WHERE t.weightedRating IS NOT NULL
  AND td.titleType = 'tvSeries'
  AND (t.genreMask & @requiredGenreMask) = @requiredGenreMask
GROUP BY d.nconst, d.primaryName
HAVING COUNT(*) >= 3
ORDER BY avgWeightedRating DESC;


=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

**Analysis of Directors by Profession Combination**

| **Version** | **Execution Time** | **Optimization Strategies Utilized** |
|----------|----------|----------|
| Naive | 366 seconds | Naive solution, utilized GROUP BY and ORDER BY with the AVG, SUM, and COUNT functions on tables aquired from the WITH statement |
| First Update | 5.063 seconds | First optimization, utilized precalculated averages and rated works count instead of calculating for them |
| Optimized | 0.250 seconds (major improvement) | Final optimization, a "profession" bitmask was created to make it more "queriable and indexable" |
| Test Version | 2.016 seconds (significantly worse) | Final optimized version, an index was added on (profComboMask) |


Naive 

In [None]:
%%sql

-- Get the combinations
WITH professionsPerDirector AS (
  SELECT
      dpb.nconst,
      GROUP_CONCAT(p.professionName ORDER BY p.professionName SEPARATOR ', ') AS professionCombo
  FROM director_profession_bridge AS dpb
  JOIN profession_dt AS p
    ON p.professionID = dpb.professionID
  GROUP BY dpb.nconst
),

-- Average the performance and total rated works of each director
directorPerformances AS (
  SELECT
      tdb.nconst,
      AVG(t.weightedRating) AS directorAverage,
      COUNT(DISTINCT t.tconst) AS directorWorks
  FROM title_director_bridge AS tdb
  JOIN title_ft AS t
    ON t.tconst = tdb.tconst
  WHERE t.weightedRating IS NOT NULL
  GROUP BY tdb.nconst
)

SELECT
  ppd.professionCombo,
  AVG(dp.directorAverage) AS overallAverage,
  COUNT(*) AS directorCount,
  SUM(dp.directorWorks) AS totalWorks
FROM professionsPerDirector AS ppd
JOIN directorPerformances AS dp
  ON dp.nconst = ppd.nconst
GROUP BY ppd.professionCombo
HAVING
  COUNT(*) >= 30 -- At least 30 directors
  AND SUM(dp.directorWorks) >= 30 -- At least 30 works amongst all of them
ORDER BY overallAverage DESC, directorCount DESC;

First Optimization

In [None]:
%%sql

WITH professionsPerDirector AS (
  SELECT
      dpb.nconst,
      GROUP_CONCAT(p.professionName ORDER BY p.professionName SEPARATOR ', ') AS professionCombo
  FROM director_profession_bridge AS dpb
  JOIN profession_dt AS p
    ON p.professionID = dpb.professionID
  GROUP BY dpb.nconst
)
SELECT
  ppd.professionCombo,
  AVG(d.avgWeightedRating) AS overallAverage,
  COUNT(*) AS directorCount,
  SUM(d.validCount) AS totalWorks
FROM professionsPerDirector AS ppd
JOIN directors_dt AS d
  ON d.nconst = ppd.nconst
WHERE d.avgWeightedRating IS NOT NULL
  AND d.validCount > 0
GROUP BY ppd.professionCombo
HAVING
  COUNT(*) >= 30 -- at least 30 directors in the combo
  AND SUM(d.validCount) >= 30 -- at least 30 works across them
ORDER BY overallAverage DESC, directorCount DESC;

Final Optimization With Bitmask

In [None]:
%%sql

DROP TABLE IF EXISTS profession_map;
CREATE TABLE profession_map (
  professionID INT PRIMARY KEY,
  bitpos TINYINT UNSIGNED NOT NULL,
  UNIQUE KEY ux_bitpos (bitpos)
) ENGINE=InnoDB;

INSERT INTO profession_map (professionID, bitpos)
SELECT professionID, ROW_NUMBER() OVER (ORDER BY professionID) - 1
FROM profession_dt
ORDER BY professionID;

ALTER TABLE directors_dt
  DROP COLUMN professionCombo,
  DROP COLUMN profComboMask;

ALTER TABLE directors_dt
  ADD COLUMN professionCombo VARCHAR(255) NULL,
  ADD COLUMN profComboMask BIGINT UNSIGNED NULL;

WITH
professionsPerDirector AS (
  SELECT
      dpb.nconst,
      GROUP_CONCAT(p.professionName ORDER BY p.professionName SEPARATOR ', ') AS professionCombo
  FROM director_profession_bridge AS dpb
  JOIN profession_dt AS p
    ON p.professionID = dpb.professionID
  GROUP BY dpb.nconst
),
masks AS (
  SELECT
      dpb.nconst,
      BIT_OR(1 << pm.bitpos) AS comboMask
  FROM director_profession_bridge AS dpb
  JOIN profession_map AS pm
    ON pm.professionID = dpb.professionID
  GROUP BY dpb.nconst
)
UPDATE directors_dt d
JOIN professionsPerDirector ppd ON ppd.nconst = d.nconst
JOIN masks m ON m.nconst = d.nconst
SET d.professionCombo = ppd.professionCombo,
    d.profComboMask = m.comboMask;

SELECT
  d.profComboMask,
  AVG(d.avgWeightedRating) AS overallAverage,
  COUNT(*)                 AS directorCount,
  SUM(d.validCount)        AS totalWorks
FROM directors_dt AS d
WHERE d.avgWeightedRating IS NOT NULL
  AND d.validCount > 0
GROUP BY d.profComboMask
HAVING COUNT(*) >= 30
   AND SUM(d.validCount) >= 30
ORDER BY overallAverage DESC, directorCount DESC;


Test Optimization with Indexes

In [None]:
%%sql

ALTER TABLE directors_dt
  ADD INDEX idx_profComboMask (profComboMask);

SELECT
  d.profComboMask,
  AVG(d.avgWeightedRating) AS overallAverage,
  COUNT(*)                 AS directorCount,
  SUM(d.validCount)        AS totalWorks
FROM directors_dt AS d
WHERE d.avgWeightedRating IS NOT NULL
  AND d.validCount > 0
GROUP BY d.profComboMask
HAVING COUNT(*) >= 30
   AND SUM(d.validCount) >= 30
ORDER BY overallAverage DESC, directorCount DESC;

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

**Overall director search**

| **Version** | **Execution Time** | **Optimization Strategies Utilized** |
|----------|----------|----------|
| Naive | 59 seconds | Naive solution, utilized GROUP BY and ORDER BY with the AVG, SUM, and COUNT functions on (tconst, weightedRating, weightedRating, respectively). Used WHERE statements to filter names, genres, and types |
| Optimized | 59 seconds (no difference) | Final optimization, indexes made the time significantly worse and the naive solution was deemed to be optimal|

Naive 

In [None]:
%%sql

WITH matchedTitles AS (
  SELECT
      tdb.nconst,
      t.tconst,
      t.startYear,
      t.weightedRating
  FROM title_director_bridge tdb
  JOIN title_ft t   ON t.tconst = tdb.tconst
  JOIN type_dt td   ON td.typeID = t.typeID
  WHERE td.titleType IN ('tvEpisode','movie')
    AND EXISTS (
      SELECT 1
      FROM title_genre_bridge tg
      JOIN genre_dt g ON g.genreID = tg.genreID
      WHERE tg.tconst = t.tconst
        AND g.genreName IN ('Drama','Talk-Show')
    )
    AND t.startYear BETWEEN 1900 AND 2025
)

SELECT
  d.nconst,
  d.primaryName,
  COUNT(ft.tconst) AS totalWorks,
  SUM(ft.weightedRating IS NOT NULL) AS ratedWorks,
  AVG(ft.weightedRating) AS avgWeightedRating,
  (COALESCE(d.deathYear, YEAR(CURDATE())) - MIN(ft.startYear)) AS age
FROM directors_dt d
JOIN matchedTitles ft
  ON ft.nconst = d.nconst
WHERE d.primaryName LIKE '%man%'
GROUP BY
  d.nconst, d.primaryName, COALESCE(d.deathYear, YEAR(CURDATE()))
HAVING totalWorks >= 5
ORDER BY
  avgWeightedRating DESC, totalWorks DESC;

Final Optimization

In [None]:
%%sql

WITH matchedTitles AS (
  SELECT
      tdb.nconst,
      t.tconst,
      t.startYear,
      t.weightedRating
  FROM title_director_bridge tdb
  JOIN title_ft t   ON t.tconst = tdb.tconst
  JOIN type_dt td   ON td.typeID = t.typeID
  WHERE td.titleType IN ('tvEpisode','movie')
    AND EXISTS (
      SELECT 1
      FROM title_genre_bridge tg
      JOIN genre_dt g ON g.genreID = tg.genreID
      WHERE tg.tconst = t.tconst
        AND g.genreName IN ('Drama','Talk-Show')
    )
    AND t.startYear BETWEEN 1900 AND 2025
)

SELECT
  d.nconst,
  d.primaryName,
  COUNT(ft.tconst) AS totalWorks,
  SUM(ft.weightedRating IS NOT NULL) AS ratedWorks,
  AVG(ft.weightedRating) AS avgWeightedRating,
  (COALESCE(d.deathYear, YEAR(CURDATE())) - MIN(ft.startYear)) AS age
FROM directors_dt d
JOIN matchedTitles ft
  ON ft.nconst = d.nconst
WHERE d.primaryName LIKE '%man%'
GROUP BY
  d.nconst, d.primaryName, COALESCE(d.deathYear, YEAR(CURDATE()))
HAVING totalWorks >= 5
ORDER BY
  avgWeightedRating DESC, totalWorks DESC;

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

**Director Timeline**
| **Version** | **Execution Time** | **Optimization Strategies Utilized** |
|----------|----------|----------|
| Naive | 457 seconds | Naive solution, utilized GROUP BY and ORDER BY with the AVG, SUM, and COUNT functions on (weightedRating, startYear or weightedRating, *, respectively). Used WHERE statements to filter director IDs, genres, and types. Slowed down because aggregations went before filtering |
| Optimized | 0.16 seconds (major improvement) | Final optimization, immediately filter the director before aggregating|

Naive

In [None]:
%%sql

SELECT
  d.primaryName,
  COUNT(*) AS totalWorks,
  SUM((
        SELECT (t2.startYear IS NULL OR t2.weightedRating IS NULL)
        FROM title_ft t2
        WHERE t2.tconst = t.tconst
      )) AS missingInfoCount
FROM directors_dt AS d
JOIN title_director_bridge AS tdb 
	ON d.nconst = tdb.nconst
JOIN title_ft AS t
	ON t.tconst = tdb.tconst
GROUP BY d.nconst, d.primaryName
HAVING d.nconst = 'nm2078274';

SELECT
  t.startYear,
  AVG(t.weightedRating) AS avgWeightedRating,
  COUNT(*) AS ratedWorks
FROM title_director_bridge AS tdb
JOIN title_ft AS t  ON t.tconst = tdb.tconst
JOIN type_dt AS td  ON td.typeID = t.typeID
WHERE tdb.nconst = 'nm2078274'
  AND t.startYear BETWEEN 1900 AND 2025
  AND t.weightedRating IS NOT NULL
  AND td.titleType IN ('tvEpisode', 'movie')
  AND EXISTS (
    SELECT 1
    FROM title_genre_bridge tg
    JOIN genre_dt g ON g.genreID = tg.genreID
    WHERE tg.tconst = t.tconst
      AND g.genreName IN ('Drama', 'Talk-Show')
  )
GROUP BY t.startYear
ORDER BY t.startYear;

SELECT
  d.primaryName,
  COUNT(*) AS ratedWorksPeriod,
  AVG(t.weightedRating) AS overallAvgPeriod
FROM title_director_bridge AS tdb
JOIN title_ft AS t  ON t.tconst = tdb.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
JOIN type_dt AS td ON td.typeID = t.typeID
WHERE tdb.nconst = 'nm2078274'
  AND t.startYear BETWEEN 1900 AND 2025
  AND t.weightedRating IS NOT NULL
  AND td.titleType IN ('tvEpisode', 'movie')
  AND EXISTS (
    SELECT 1
    FROM title_genre_bridge tg
    JOIN genre_dt g ON g.genreID = tg.genreID
    WHERE tg.tconst = t.tconst
      AND g.genreName IN ('Drama', 'Talk-Show')
  );



Final Optimization

In [None]:
%%sql

SELECT
  d.primaryName,
  COUNT(*) AS totalWorks,
  SUM(t.startYear IS NULL OR t.weightedRating IS NULL) AS missingInfoCount
FROM title_director_bridge AS tdb
JOIN title_ft AS t ON t.tconst = tdb.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
WHERE tdb.nconst = 'nm2078274';

SELECT
  t.startYear,
  AVG(t.weightedRating) AS avgWeightedRating,
  COUNT(*)              AS ratedWorks
FROM title_director_bridge AS tdb
JOIN title_ft AS t  ON t.tconst = tdb.tconst
JOIN type_dt AS td  ON td.typeID = t.typeID
WHERE tdb.nconst = 'nm2078274'
  AND t.startYear BETWEEN 1900 AND 2025
  AND t.weightedRating IS NOT NULL
  AND td.titleType IN ('tvEpisode', 'movie')
  AND EXISTS (
    SELECT 1
    FROM title_genre_bridge tg
    JOIN genre_dt g ON g.genreID = tg.genreID
    WHERE tg.tconst = t.tconst
      AND g.genreName IN ('Drama', 'Talk-Show')
  )
GROUP BY t.startYear
ORDER BY t.startYear;

-- Overall summary for the period (same filters as the timeline)
SELECT
  d.primaryName,
  COUNT(*)              AS ratedWorksPeriod,
  AVG(t.weightedRating) AS overallAvgPeriod
FROM title_director_bridge AS tdb
JOIN title_ft AS t  ON t.tconst = tdb.tconst
JOIN directors_dt AS d ON d.nconst = tdb.nconst
JOIN type_dt AS td ON td.typeID = t.typeID
WHERE tdb.nconst = 'nm2078274'
  AND t.startYear BETWEEN 1900 AND 2025
  AND t.weightedRating IS NOT NULL
  AND td.titleType IN ('tvEpisode', 'movie')
  AND EXISTS (
    SELECT 1
    FROM title_genre_bridge tg
    JOIN genre_dt g ON g.genreID = tg.genreID
    WHERE tg.tconst = t.tconst
      AND g.genreName IN ('Drama', 'Talk-Show')
  );

